diff options
Diffstat (limited to 'drivers/staging/lustre/lnet')
67 files changed, 16875 insertions, 5157 deletions
diff --git a/drivers/staging/lustre/lnet/Kconfig b/drivers/staging/lustre/lnet/Kconfig index 00850eeb6..2b5930150 100644 --- a/drivers/staging/lustre/lnet/Kconfig +++ b/drivers/staging/lustre/lnet/Kconfig @@ -1,10 +1,16 @@ config LNET - tristate "Lustre networking subsystem" - depends on LUSTRE_FS + tristate "Lustre networking subsystem (LNet)" + depends on INET && m + help + The Lustre network layer, also known as LNet, is a networking abstaction + level API that was initially created to allow Lustre Filesystem to utilize + very different networks like tcp and ib verbs in a uniform way. In the + case of Lustre routers only the LNet layer is required. Lately other + projects are also looking into using LNet as their networking API as well. config LNET_MAX_PAYLOAD - int "Lustre lnet max transfer payload (default 2MB)" - depends on LUSTRE_FS + int "Lustre lnet max transfer payload (default 1MB)" + depends on LNET default "1048576" help This option defines the maximum size of payload in bytes that lnet diff --git a/drivers/staging/lustre/lnet/Makefile b/drivers/staging/lustre/lnet/Makefile index f6f03e304..0a380fe88 100644 --- a/drivers/staging/lustre/lnet/Makefile +++ b/drivers/staging/lustre/lnet/Makefile @@ -1 +1 @@ -obj-$(CONFIG_LNET) += lnet/ klnds/ selftest/ +obj-$(CONFIG_LNET) += libcfs/ lnet/ klnds/ selftest/ diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c index cb74ae731..0d32e6541 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c @@ -42,15 +42,7 @@ #include <asm/page.h> #include "o2iblnd.h" -static lnd_t the_o2iblnd = { - .lnd_type = O2IBLND, - .lnd_startup = kiblnd_startup, - .lnd_shutdown = kiblnd_shutdown, - .lnd_ctl = kiblnd_ctl, - .lnd_query = kiblnd_query, - .lnd_send = kiblnd_send, - .lnd_recv = kiblnd_recv, -}; +static lnd_t the_o2iblnd; kib_data_t kiblnd_data; @@ -63,7 +55,7 @@ static __u32 kiblnd_cksum(void *ptr, int nob) sum = ((sum << 1) | (sum >> 31)) + *c++; /* ensure I don't return 0 (== no checksum) */ - return (sum == 0) ? 1 : sum; + return !sum ? 1 : sum; } static char *kiblnd_msgtype2str(int type) @@ -145,7 +137,7 @@ static int kiblnd_unpack_rd(kib_msg_t *msg, int flip) int i; LASSERT(msg->ibm_type == IBLND_MSG_GET_REQ || - msg->ibm_type == IBLND_MSG_PUT_ACK); + msg->ibm_type == IBLND_MSG_PUT_ACK); rd = msg->ibm_type == IBLND_MSG_GET_REQ ? &msg->ibm_u.get.ibgm_rd : @@ -189,8 +181,10 @@ void kiblnd_pack_msg(lnet_ni_t *ni, kib_msg_t *msg, int version, { kib_net_t *net = ni->ni_data; - /* CAVEAT EMPTOR! all message fields not set here should have been - * initialised previously. */ + /* + * CAVEAT EMPTOR! all message fields not set here should have been + * initialised previously. + */ msg->ibm_magic = IBLND_MSG_MAGIC; msg->ibm_version = version; /* ibm_type */ @@ -249,11 +243,13 @@ int kiblnd_unpack_msg(kib_msg_t *msg, int nob) return -EPROTO; } - /* checksum must be computed with ibm_cksum zero and BEFORE anything - * gets flipped */ + /* + * checksum must be computed with ibm_cksum zero and BEFORE anything + * gets flipped + */ msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum; msg->ibm_cksum = 0; - if (msg_cksum != 0 && + if (msg_cksum && msg_cksum != kiblnd_cksum(msg, msg_nob)) { CERROR("Bad checksum\n"); return -EPROTO; @@ -326,21 +322,21 @@ int kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid) int cpt = lnet_cpt_of_nid(nid); unsigned long flags; - LASSERT(net != NULL); + LASSERT(net); LASSERT(nid != LNET_NID_ANY); LIBCFS_CPT_ALLOC(peer, lnet_cpt_table(), cpt, sizeof(*peer)); - if (peer == NULL) { + if (!peer) { CERROR("Cannot allocate peer\n"); return -ENOMEM; } - memset(peer, 0, sizeof(*peer)); /* zero flags etc */ - peer->ibp_ni = ni; peer->ibp_nid = nid; peer->ibp_error = 0; peer->ibp_last_alive = 0; + peer->ibp_max_frags = IBLND_CFG_RDMA_FRAGS; + peer->ibp_queue_depth = *kiblnd_tunables.kib_peertxcredits; atomic_set(&peer->ibp_refcount, 1); /* 1 ref for caller */ INIT_LIST_HEAD(&peer->ibp_list); /* not in the peer table yet */ @@ -350,7 +346,7 @@ int kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid) write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); /* always called with a ref on ni, which prevents ni being shutdown */ - LASSERT(net->ibn_shutdown == 0); + LASSERT(!net->ibn_shutdown); /* npeers only grows with the global lock held */ atomic_inc(&net->ibn_npeers); @@ -365,38 +361,36 @@ void kiblnd_destroy_peer(kib_peer_t *peer) { kib_net_t *net = peer->ibp_ni->ni_data; - LASSERT(net != NULL); - LASSERT(atomic_read(&peer->ibp_refcount) == 0); + LASSERT(net); + LASSERT(!atomic_read(&peer->ibp_refcount)); LASSERT(!kiblnd_peer_active(peer)); - LASSERT(peer->ibp_connecting == 0); - LASSERT(peer->ibp_accepting == 0); - LASSERT(list_empty(&peer->ibp_conns)); + LASSERT(kiblnd_peer_idle(peer)); LASSERT(list_empty(&peer->ibp_tx_queue)); LIBCFS_FREE(peer, sizeof(*peer)); - /* NB a peer's connections keep a reference on their peer until + /* + * NB a peer's connections keep a reference on their peer until * they are destroyed, so we can be assured that _all_ state to do * with this peer has been cleaned up when its refcount drops to - * zero. */ + * zero. + */ atomic_dec(&net->ibn_npeers); } kib_peer_t *kiblnd_find_peer_locked(lnet_nid_t nid) { - /* the caller is responsible for accounting the additional reference - * that this creates */ + /* + * the caller is responsible for accounting the additional reference + * that this creates + */ struct list_head *peer_list = kiblnd_nid2peerlist(nid); struct list_head *tmp; kib_peer_t *peer; list_for_each(tmp, peer_list) { - peer = list_entry(tmp, kib_peer_t, ibp_list); - - LASSERT(peer->ibp_connecting > 0 || /* creating conns */ - peer->ibp_accepting > 0 || - !list_empty(&peer->ibp_conns)); /* active conn */ + LASSERT(!kiblnd_peer_idle(peer)); if (peer->ibp_nid != nid) continue; @@ -431,13 +425,9 @@ static int kiblnd_get_peer_info(lnet_ni_t *ni, int index, read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) { - list_for_each(ptmp, &kiblnd_data.kib_peers[i]) { - peer = list_entry(ptmp, kib_peer_t, ibp_list); - LASSERT(peer->ibp_connecting > 0 || - peer->ibp_accepting > 0 || - !list_empty(&peer->ibp_conns)); + LASSERT(!kiblnd_peer_idle(peer)); if (peer->ibp_ni != ni) continue; @@ -474,8 +464,10 @@ static void kiblnd_del_peer_locked(kib_peer_t *peer) } /* NB closing peer's last conn unlinked it. */ } - /* NB peer now unlinked; might even be freed if the peer table had the - * last ref on it. */ + /* + * NB peer now unlinked; might even be freed if the peer table had the + * last ref on it. + */ } static int kiblnd_del_peer(lnet_ni_t *ni, lnet_nid_t nid) @@ -493,7 +485,8 @@ static int kiblnd_del_peer(lnet_ni_t *ni, lnet_nid_t nid) write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); if (nid != LNET_NID_ANY) { - lo = hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; + lo = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; + hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; } else { lo = 0; hi = kiblnd_data.kib_peer_hash_size - 1; @@ -502,9 +495,7 @@ static int kiblnd_del_peer(lnet_ni_t *ni, lnet_nid_t nid) for (i = lo; i <= hi; i++) { list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) { peer = list_entry(ptmp, kib_peer_t, ibp_list); - LASSERT(peer->ibp_connecting > 0 || - peer->ibp_accepting > 0 || - !list_empty(&peer->ibp_conns)); + LASSERT(!kiblnd_peer_idle(peer)); if (peer->ibp_ni != ni) continue; @@ -516,7 +507,7 @@ static int kiblnd_del_peer(lnet_ni_t *ni, lnet_nid_t nid) LASSERT(list_empty(&peer->ibp_conns)); list_splice_init(&peer->ibp_tx_queue, - &zombies); + &zombies); } kiblnd_del_peer_locked(peer); @@ -544,11 +535,8 @@ static kib_conn_t *kiblnd_get_conn_by_idx(lnet_ni_t *ni, int index) for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) { list_for_each(ptmp, &kiblnd_data.kib_peers[i]) { - peer = list_entry(ptmp, kib_peer_t, ibp_list); - LASSERT(peer->ibp_connecting > 0 || - peer->ibp_accepting > 0 || - !list_empty(&peer->ibp_conns)); + LASSERT(!kiblnd_peer_idle(peer)); if (peer->ibp_ni != ni) continue; @@ -558,7 +546,7 @@ static kib_conn_t *kiblnd_get_conn_by_idx(lnet_ni_t *ni, int index) continue; conn = list_entry(ctmp, kib_conn_t, - ibc_list); + ibc_list); kiblnd_conn_addref(conn); read_unlock_irqrestore( &kiblnd_data.kib_global_lock, @@ -597,12 +585,12 @@ static void kiblnd_setup_mtu_locked(struct rdma_cm_id *cmid) int mtu; /* XXX There is no path record for iWARP, set by netdev->change_mtu? */ - if (cmid->route.path_rec == NULL) + if (!cmid->route.path_rec) return; mtu = kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu); LASSERT(mtu >= 0); - if (mtu != 0) + if (mtu) cmid->route.path_rec->mtu = mtu; } @@ -619,13 +607,13 @@ static int kiblnd_get_completion_vector(kib_conn_t *conn, int cpt) return 0; mask = cfs_cpt_cpumask(lnet_cpt_table(), cpt); - if (mask == NULL) + if (!mask) return 0; /* hash NID to CPU id in this partition... */ off = do_div(nid, cpumask_weight(mask)); for_each_cpu(i, mask) { - if (off-- == 0) + if (!off--) return i % vectors; } @@ -634,15 +622,17 @@ static int kiblnd_get_completion_vector(kib_conn_t *conn, int cpt) } kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, - int state, int version) + int state, int version) { - /* CAVEAT EMPTOR: + /* + * CAVEAT EMPTOR: * If the new conn is created successfully it takes over the caller's * ref on 'peer'. It also "owns" 'cmid' and destroys it when it itself * is destroyed. On failure, the caller's ref on 'peer' remains and * she must dispose of 'cmid'. (Actually I'd block forever if I tried * to destroy 'cmid' here since I'm called from the CM which still has - * its ref on 'cmid'). */ + * its ref on 'cmid'). + */ rwlock_t *glock = &kiblnd_data.kib_global_lock; kib_net_t *net = peer->ibp_ni->ni_data; kib_dev_t *dev; @@ -656,7 +646,7 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, int rc; int i; - LASSERT(net != NULL); + LASSERT(net); LASSERT(!in_interrupt()); dev = net->ibn_dev; @@ -668,14 +658,14 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, LIBCFS_CPT_ALLOC(init_qp_attr, lnet_cpt_table(), cpt, sizeof(*init_qp_attr)); - if (init_qp_attr == NULL) { + if (!init_qp_attr) { CERROR("Can't allocate qp_attr for %s\n", libcfs_nid2str(peer->ibp_nid)); goto failed_0; } LIBCFS_CPT_ALLOC(conn, lnet_cpt_table(), cpt, sizeof(*conn)); - if (conn == NULL) { + if (!conn) { CERROR("Can't allocate connection for %s\n", libcfs_nid2str(peer->ibp_nid)); goto failed_1; @@ -686,6 +676,8 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, conn->ibc_peer = peer; /* I take the caller's ref */ cmid->context = conn; /* for future CM callbacks */ conn->ibc_cmid = cmid; + conn->ibc_max_frags = peer->ibp_max_frags; + conn->ibc_queue_depth = peer->ibp_queue_depth; INIT_LIST_HEAD(&conn->ibc_early_rxs); INIT_LIST_HEAD(&conn->ibc_tx_noops); @@ -697,7 +689,7 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, LIBCFS_CPT_ALLOC(conn->ibc_connvars, lnet_cpt_table(), cpt, sizeof(*conn->ibc_connvars)); - if (conn->ibc_connvars == NULL) { + if (!conn->ibc_connvars) { CERROR("Can't allocate in-progress connection state\n"); goto failed_2; } @@ -731,42 +723,42 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, write_unlock_irqrestore(glock, flags); LIBCFS_CPT_ALLOC(conn->ibc_rxs, lnet_cpt_table(), cpt, - IBLND_RX_MSGS(version) * sizeof(kib_rx_t)); - if (conn->ibc_rxs == NULL) { + IBLND_RX_MSGS(conn) * sizeof(kib_rx_t)); + if (!conn->ibc_rxs) { CERROR("Cannot allocate RX buffers\n"); goto failed_2; } rc = kiblnd_alloc_pages(&conn->ibc_rx_pages, cpt, - IBLND_RX_MSG_PAGES(version)); - if (rc != 0) + IBLND_RX_MSG_PAGES(conn)); + if (rc) goto failed_2; kiblnd_map_rx_descs(conn); - cq_attr.cqe = IBLND_CQ_ENTRIES(version); + cq_attr.cqe = IBLND_CQ_ENTRIES(conn); cq_attr.comp_vector = kiblnd_get_completion_vector(conn, cpt); cq = ib_create_cq(cmid->device, kiblnd_cq_completion, kiblnd_cq_event, conn, &cq_attr); if (IS_ERR(cq)) { - CERROR("Can't create CQ: %ld, cqe: %d\n", - PTR_ERR(cq), IBLND_CQ_ENTRIES(version)); + CERROR("Failed to create CQ with %d CQEs: %ld\n", + IBLND_CQ_ENTRIES(conn), PTR_ERR(cq)); goto failed_2; } conn->ibc_cq = cq; rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); - if (rc != 0) { - CERROR("Can't request completion notificiation: %d\n", rc); + if (rc) { + CERROR("Can't request completion notification: %d\n", rc); goto failed_2; } init_qp_attr->event_handler = kiblnd_qp_event; init_qp_attr->qp_context = conn; - init_qp_attr->cap.max_send_wr = IBLND_SEND_WRS(version); - init_qp_attr->cap.max_recv_wr = IBLND_RECV_WRS(version); + init_qp_attr->cap.max_send_wr = IBLND_SEND_WRS(conn); + init_qp_attr->cap.max_recv_wr = IBLND_RECV_WRS(conn); init_qp_attr->cap.max_send_sge = 1; init_qp_attr->cap.max_recv_sge = 1; init_qp_attr->sq_sig_type = IB_SIGNAL_REQ_WR; @@ -777,7 +769,7 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, conn->ibc_sched = sched; rc = rdma_create_qp(cmid, conn->ibc_hdev->ibh_pd, init_qp_attr); - if (rc != 0) { + if (rc) { CERROR("Can't create QP: %d, send_wr: %d, recv_wr: %d\n", rc, init_qp_attr->cap.max_send_wr, init_qp_attr->cap.max_recv_wr); @@ -787,33 +779,37 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr)); /* 1 ref for caller and each rxmsg */ - atomic_set(&conn->ibc_refcount, 1 + IBLND_RX_MSGS(version)); - conn->ibc_nrx = IBLND_RX_MSGS(version); + atomic_set(&conn->ibc_refcount, 1 + IBLND_RX_MSGS(conn)); + conn->ibc_nrx = IBLND_RX_MSGS(conn); /* post receives */ - for (i = 0; i < IBLND_RX_MSGS(version); i++) { + for (i = 0; i < IBLND_RX_MSGS(conn); i++) { rc = kiblnd_post_rx(&conn->ibc_rxs[i], IBLND_POSTRX_NO_CREDIT); - if (rc != 0) { + if (rc) { CERROR("Can't post rxmsg: %d\n", rc); /* Make posted receives complete */ kiblnd_abort_receives(conn); - /* correct # of posted buffers - * NB locking needed now I'm racing with completion */ + /* + * correct # of posted buffers + * NB locking needed now I'm racing with completion + */ spin_lock_irqsave(&sched->ibs_lock, flags); - conn->ibc_nrx -= IBLND_RX_MSGS(version) - i; + conn->ibc_nrx -= IBLND_RX_MSGS(conn) - i; spin_unlock_irqrestore(&sched->ibs_lock, flags); - /* cmid will be destroyed by CM(ofed) after cm_callback + /* + * cmid will be destroyed by CM(ofed) after cm_callback * returned, so we can't refer it anymore - * (by kiblnd_connd()->kiblnd_destroy_conn) */ + * (by kiblnd_connd()->kiblnd_destroy_conn) + */ rdma_destroy_qp(conn->ibc_cmid); conn->ibc_cmid = NULL; /* Drop my own and unused rxbuffer refcounts */ - while (i++ <= IBLND_RX_MSGS(version)) + while (i++ <= IBLND_RX_MSGS(conn)) kiblnd_conn_decref(conn); return NULL; @@ -822,7 +818,7 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, /* Init successful! */ LASSERT(state == IBLND_CONN_ACTIVE_CONNECT || - state == IBLND_CONN_PASSIVE_WAIT); + state == IBLND_CONN_PASSIVE_WAIT); conn->ibc_state = state; /* 1 more conn */ @@ -830,29 +826,29 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, return conn; failed_2: - kiblnd_destroy_conn(conn); + kiblnd_destroy_conn(conn, true); failed_1: LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr)); failed_0: return NULL; } -void kiblnd_destroy_conn(kib_conn_t *conn) +void kiblnd_destroy_conn(kib_conn_t *conn, bool free_conn) { struct rdma_cm_id *cmid = conn->ibc_cmid; kib_peer_t *peer = conn->ibc_peer; int rc; LASSERT(!in_interrupt()); - LASSERT(atomic_read(&conn->ibc_refcount) == 0); + LASSERT(!atomic_read(&conn->ibc_refcount)); LASSERT(list_empty(&conn->ibc_early_rxs)); LASSERT(list_empty(&conn->ibc_tx_noops)); LASSERT(list_empty(&conn->ibc_tx_queue)); LASSERT(list_empty(&conn->ibc_tx_queue_rsrvd)); LASSERT(list_empty(&conn->ibc_tx_queue_nocred)); LASSERT(list_empty(&conn->ibc_active_txs)); - LASSERT(conn->ibc_noops_posted == 0); - LASSERT(conn->ibc_nsends_posted == 0); + LASSERT(!conn->ibc_noops_posted); + LASSERT(!conn->ibc_nsends_posted); switch (conn->ibc_state) { default: @@ -861,7 +857,7 @@ void kiblnd_destroy_conn(kib_conn_t *conn) case IBLND_CONN_DISCONNECTED: /* connvars should have been freed already */ - LASSERT(conn->ibc_connvars == NULL); + LASSERT(!conn->ibc_connvars); break; case IBLND_CONN_INIT: @@ -869,28 +865,27 @@ void kiblnd_destroy_conn(kib_conn_t *conn) } /* conn->ibc_cmid might be destroyed by CM already */ - if (cmid != NULL && cmid->qp != NULL) + if (cmid && cmid->qp) rdma_destroy_qp(cmid); - if (conn->ibc_cq != NULL) { + if (conn->ibc_cq) { rc = ib_destroy_cq(conn->ibc_cq); - if (rc != 0) + if (rc) CWARN("Error destroying CQ: %d\n", rc); } - if (conn->ibc_rx_pages != NULL) + if (conn->ibc_rx_pages) kiblnd_unmap_rx_descs(conn); - if (conn->ibc_rxs != NULL) { + if (conn->ibc_rxs) { LIBCFS_FREE(conn->ibc_rxs, - IBLND_RX_MSGS(conn->ibc_version) - * sizeof(kib_rx_t)); + IBLND_RX_MSGS(conn) * sizeof(kib_rx_t)); } - if (conn->ibc_connvars != NULL) + if (conn->ibc_connvars) LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars)); - if (conn->ibc_hdev != NULL) + if (conn->ibc_hdev) kiblnd_hdev_decref(conn->ibc_hdev); /* See CAVEAT EMPTOR above in kiblnd_create_conn */ @@ -927,7 +922,7 @@ int kiblnd_close_peer_conns_locked(kib_peer_t *peer, int why) } int kiblnd_close_stale_conns_locked(kib_peer_t *peer, - int version, __u64 incarnation) + int version, __u64 incarnation) { kib_conn_t *conn; struct list_head *ctmp; @@ -967,20 +962,18 @@ static int kiblnd_close_matching_conns(lnet_ni_t *ni, lnet_nid_t nid) write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - if (nid != LNET_NID_ANY) - lo = hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; - else { + if (nid != LNET_NID_ANY) { + lo = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; + hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; + } else { lo = 0; hi = kiblnd_data.kib_peer_hash_size - 1; } for (i = lo; i <= hi; i++) { list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) { - peer = list_entry(ptmp, kib_peer_t, ibp_list); - LASSERT(peer->ibp_connecting > 0 || - peer->ibp_accepting > 0 || - !list_empty(&peer->ibp_conns)); + LASSERT(!kiblnd_peer_idle(peer)); if (peer->ibp_ni != ni) continue; @@ -998,10 +991,10 @@ static int kiblnd_close_matching_conns(lnet_ni_t *ni, lnet_nid_t nid) if (nid == LNET_NID_ANY) return 0; - return (count == 0) ? -ENOENT : 0; + return !count ? -ENOENT : 0; } -int kiblnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) +static int kiblnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) { struct libcfs_ioctl_data *data = arg; int rc = -EINVAL; @@ -1027,14 +1020,14 @@ int kiblnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) rc = 0; conn = kiblnd_get_conn_by_idx(ni, data->ioc_count); - if (conn == NULL) { + if (!conn) { rc = -ENOENT; break; } - LASSERT(conn->ibc_cmid != NULL); + LASSERT(conn->ibc_cmid); data->ioc_nid = conn->ibc_peer->ibp_nid; - if (conn->ibc_cmid->route.path_rec == NULL) + if (!conn->ibc_cmid->route.path_rec) data->ioc_u32[0] = 0; /* iWarp has no path MTU */ else data->ioc_u32[0] = @@ -1054,7 +1047,7 @@ int kiblnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) return rc; } -void kiblnd_query(lnet_ni_t *ni, lnet_nid_t nid, unsigned long *when) +static void kiblnd_query(lnet_ni_t *ni, lnet_nid_t nid, unsigned long *when) { unsigned long last_alive = 0; unsigned long now = cfs_time_current(); @@ -1065,21 +1058,19 @@ void kiblnd_query(lnet_ni_t *ni, lnet_nid_t nid, unsigned long *when) read_lock_irqsave(glock, flags); peer = kiblnd_find_peer_locked(nid); - if (peer != NULL) { - LASSERT(peer->ibp_connecting > 0 || /* creating conns */ - peer->ibp_accepting > 0 || - !list_empty(&peer->ibp_conns)); /* active conn */ + if (peer) last_alive = peer->ibp_last_alive; - } read_unlock_irqrestore(glock, flags); - if (last_alive != 0) + if (last_alive) *when = last_alive; - /* peer is not persistent in hash, trigger peer creation - * and connection establishment with a NULL tx */ - if (peer == NULL) + /* + * peer is not persistent in hash, trigger peer creation + * and connection establishment with a NULL tx + */ + if (!peer) kiblnd_launch_tx(ni, NULL, nid); CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago\n", @@ -1087,13 +1078,13 @@ void kiblnd_query(lnet_ni_t *ni, lnet_nid_t nid, unsigned long *when) last_alive ? cfs_duration_sec(now - last_alive) : -1); } -void kiblnd_free_pages(kib_pages_t *p) +static void kiblnd_free_pages(kib_pages_t *p) { int npages = p->ibp_npages; int i; for (i = 0; i < npages; i++) { - if (p->ibp_pages[i] != NULL) + if (p->ibp_pages[i]) __free_page(p->ibp_pages[i]); } @@ -1107,7 +1098,7 @@ int kiblnd_alloc_pages(kib_pages_t **pp, int cpt, int npages) LIBCFS_CPT_ALLOC(p, lnet_cpt_table(), cpt, offsetof(kib_pages_t, ibp_pages[npages])); - if (p == NULL) { + if (!p) { CERROR("Can't allocate descriptor for %d pages\n", npages); return -ENOMEM; } @@ -1119,7 +1110,7 @@ int kiblnd_alloc_pages(kib_pages_t **pp, int cpt, int npages) p->ibp_pages[i] = alloc_pages_node( cfs_cpt_spread_node(lnet_cpt_table(), cpt), GFP_NOFS, 0); - if (p->ibp_pages[i] == NULL) { + if (!p->ibp_pages[i]) { CERROR("Can't allocate page %d of %d\n", i, npages); kiblnd_free_pages(p); return -ENOMEM; @@ -1135,10 +1126,10 @@ void kiblnd_unmap_rx_descs(kib_conn_t *conn) kib_rx_t *rx; int i; - LASSERT(conn->ibc_rxs != NULL); - LASSERT(conn->ibc_hdev != NULL); + LASSERT(conn->ibc_rxs); + LASSERT(conn->ibc_hdev); - for (i = 0; i < IBLND_RX_MSGS(conn->ibc_version); i++) { + for (i = 0; i < IBLND_RX_MSGS(conn); i++) { rx = &conn->ibc_rxs[i]; LASSERT(rx->rx_nob >= 0); /* not posted */ @@ -1162,7 +1153,7 @@ void kiblnd_map_rx_descs(kib_conn_t *conn) int ipg; int i; - for (pg_off = ipg = i = 0; i < IBLND_RX_MSGS(conn->ibc_version); i++) { + for (pg_off = ipg = i = 0; i < IBLND_RX_MSGS(conn); i++) { pg = conn->ibc_rx_pages->ibp_pages[ipg]; rx = &conn->ibc_rxs[i]; @@ -1174,7 +1165,7 @@ void kiblnd_map_rx_descs(kib_conn_t *conn) IBLND_MSG_SIZE, DMA_FROM_DEVICE); LASSERT(!kiblnd_dma_mapping_error(conn->ibc_hdev->ibh_ibdev, - rx->rx_msgaddr)); + rx->rx_msgaddr)); KIBLND_UNMAP_ADDR_SET(rx, rx_msgunmap, rx->rx_msgaddr); CDEBUG(D_NET, "rx %d: %p %#llx(%#llx)\n", @@ -1187,7 +1178,7 @@ void kiblnd_map_rx_descs(kib_conn_t *conn) if (pg_off == PAGE_SIZE) { pg_off = 0; ipg++; - LASSERT(ipg <= IBLND_RX_MSG_PAGES(conn->ibc_version)); + LASSERT(ipg <= IBLND_RX_MSG_PAGES(conn)); } } } @@ -1198,9 +1189,9 @@ static void kiblnd_unmap_tx_pool(kib_tx_pool_t *tpo) kib_tx_t *tx; int i; - LASSERT(tpo->tpo_pool.po_allocated == 0); + LASSERT(!tpo->tpo_pool.po_allocated); - if (hdev == NULL) + if (!hdev) return; for (i = 0; i < tpo->tpo_pool.po_size; i++) { @@ -1224,9 +1215,10 @@ static kib_hca_dev_t *kiblnd_current_hdev(kib_dev_t *dev) read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); while (dev->ibd_failover) { read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - if (i++ % 50 == 0) + if (!(i++ % 50)) CDEBUG(D_NET, "%s: Wait for failover\n", dev->ibd_ifname); + set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(cfs_time_seconds(1) / 100); read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); @@ -1252,7 +1244,7 @@ static void kiblnd_map_tx_pool(kib_tx_pool_t *tpo) int ipage; int i; - LASSERT(net != NULL); + LASSERT(net); dev = net->ibn_dev; @@ -1260,7 +1252,7 @@ static void kiblnd_map_tx_pool(kib_tx_pool_t *tpo) CLASSERT(IBLND_MSG_SIZE <= PAGE_SIZE); /* No fancy arithmetic when we do the buffer calculations */ - CLASSERT(PAGE_SIZE % IBLND_MSG_SIZE == 0); + CLASSERT(!(PAGE_SIZE % IBLND_MSG_SIZE)); tpo->tpo_hdev = kiblnd_current_hdev(dev); @@ -1275,7 +1267,7 @@ static void kiblnd_map_tx_pool(kib_tx_pool_t *tpo) tpo->tpo_hdev->ibh_ibdev, tx->tx_msg, IBLND_MSG_SIZE, DMA_TO_DEVICE); LASSERT(!kiblnd_dma_mapping_error(tpo->tpo_hdev->ibh_ibdev, - tx->tx_msgaddr)); + tx->tx_msgaddr)); KIBLND_UNMAP_ADDR_SET(tx, tx_msgunmap, tx->tx_msgaddr); list_add(&tx->tx_list, &pool->po_free_list); @@ -1291,68 +1283,32 @@ static void kiblnd_map_tx_pool(kib_tx_pool_t *tpo) } } -struct ib_mr *kiblnd_find_dma_mr(kib_hca_dev_t *hdev, __u64 addr, __u64 size) +struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev, kib_rdma_desc_t *rd, + int negotiated_nfrags) { - __u64 index; - - LASSERT(hdev->ibh_mrs[0] != NULL); - - if (hdev->ibh_nmrs == 1) - return hdev->ibh_mrs[0]; - - index = addr >> hdev->ibh_mr_shift; + __u16 nfrags = (negotiated_nfrags != -1) ? + negotiated_nfrags : *kiblnd_tunables.kib_map_on_demand; - if (index < hdev->ibh_nmrs && - index == ((addr + size - 1) >> hdev->ibh_mr_shift)) - return hdev->ibh_mrs[index]; - - return NULL; -} - -struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev, kib_rdma_desc_t *rd) -{ - struct ib_mr *prev_mr; - struct ib_mr *mr; - int i; - - LASSERT(hdev->ibh_mrs[0] != NULL); + LASSERT(hdev->ibh_mrs); if (*kiblnd_tunables.kib_map_on_demand > 0 && - *kiblnd_tunables.kib_map_on_demand <= rd->rd_nfrags) + nfrags <= rd->rd_nfrags) return NULL; - if (hdev->ibh_nmrs == 1) - return hdev->ibh_mrs[0]; - - for (i = 0, mr = prev_mr = NULL; - i < rd->rd_nfrags; i++) { - mr = kiblnd_find_dma_mr(hdev, - rd->rd_frags[i].rf_addr, - rd->rd_frags[i].rf_nob); - if (prev_mr == NULL) - prev_mr = mr; - - if (mr == NULL || prev_mr != mr) { - /* Can't covered by one single MR */ - mr = NULL; - break; - } - } - - return mr; + return hdev->ibh_mrs; } static void kiblnd_destroy_fmr_pool(kib_fmr_pool_t *pool) { - LASSERT(pool->fpo_map_count == 0); + LASSERT(!pool->fpo_map_count); - if (pool->fpo_fmr_pool != NULL) + if (pool->fpo_fmr_pool) ib_destroy_fmr_pool(pool->fpo_fmr_pool); - if (pool->fpo_hdev != NULL) + if (pool->fpo_hdev) kiblnd_hdev_decref(pool->fpo_hdev); - LIBCFS_FREE(pool, sizeof(kib_fmr_pool_t)); + LIBCFS_FREE(pool, sizeof(*pool)); } static void kiblnd_destroy_fmr_pool_list(struct list_head *head) @@ -1387,7 +1343,7 @@ static int kiblnd_create_fmr_pool(kib_fmr_poolset_t *fps, kib_dev_t *dev = fps->fps_net->ibn_dev; kib_fmr_pool_t *fpo; struct ib_fmr_pool_param param = { - .max_pages_per_fmr = LNET_MAX_PAYLOAD/PAGE_SIZE, + .max_pages_per_fmr = LNET_MAX_PAYLOAD / PAGE_SIZE, .page_shift = PAGE_SHIFT, .access = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE), @@ -1399,7 +1355,7 @@ static int kiblnd_create_fmr_pool(kib_fmr_poolset_t *fps, int rc; LIBCFS_CPT_ALLOC(fpo, lnet_cpt_table(), fps->fps_cpt, sizeof(*fpo)); - if (fpo == NULL) + if (!fpo) return -ENOMEM; fpo->fpo_hdev = kiblnd_current_hdev(dev); @@ -1410,7 +1366,7 @@ static int kiblnd_create_fmr_pool(kib_fmr_poolset_t *fps, CERROR("Failed to create FMR pool: %d\n", rc); kiblnd_hdev_decref(fpo->fpo_hdev); - LIBCFS_FREE(fpo, sizeof(kib_fmr_pool_t)); + LIBCFS_FREE(fpo, sizeof(*fpo)); return rc; } @@ -1424,7 +1380,7 @@ static int kiblnd_create_fmr_pool(kib_fmr_poolset_t *fps, static void kiblnd_fail_fmr_poolset(kib_fmr_poolset_t *fps, struct list_head *zombies) { - if (fps->fps_net == NULL) /* intialized? */ + if (!fps->fps_net) /* intialized? */ return; spin_lock(&fps->fps_lock); @@ -1434,7 +1390,7 @@ static void kiblnd_fail_fmr_poolset(kib_fmr_poolset_t *fps, kib_fmr_pool_t, fpo_list); fpo->fpo_failed = 1; list_del(&fpo->fpo_list); - if (fpo->fpo_map_count == 0) + if (!fpo->fpo_map_count) list_add(&fpo->fpo_list, zombies); else list_add(&fpo->fpo_list, &fps->fps_failed_pool_list); @@ -1445,7 +1401,7 @@ static void kiblnd_fail_fmr_poolset(kib_fmr_poolset_t *fps, static void kiblnd_fini_fmr_poolset(kib_fmr_poolset_t *fps) { - if (fps->fps_net != NULL) { /* initialized? */ + if (fps->fps_net) { /* initialized? */ kiblnd_destroy_fmr_pool_list(&fps->fps_failed_pool_list); kiblnd_destroy_fmr_pool_list(&fps->fps_pool_list); } @@ -1458,7 +1414,7 @@ static int kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, kib_fmr_pool_t *fpo; int rc; - memset(fps, 0, sizeof(kib_fmr_poolset_t)); + memset(fps, 0, sizeof(*fps)); fps->fps_net = net; fps->fps_cpt = cpt; @@ -1469,7 +1425,7 @@ static int kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, INIT_LIST_HEAD(&fps->fps_failed_pool_list); rc = kiblnd_create_fmr_pool(fps, &fpo); - if (rc == 0) + if (!rc) list_add_tail(&fpo->fpo_list, &fps->fps_pool_list); return rc; @@ -1477,7 +1433,7 @@ static int kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, static int kiblnd_fmr_pool_is_idle(kib_fmr_pool_t *fpo, unsigned long now) { - if (fpo->fpo_map_count != 0) /* still in use */ + if (fpo->fpo_map_count) /* still in use */ return 0; if (fpo->fpo_failed) return 1; @@ -1494,11 +1450,11 @@ void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status) int rc; rc = ib_fmr_pool_unmap(fmr->fmr_pfmr); - LASSERT(rc == 0); + LASSERT(!rc); - if (status != 0) { + if (status) { rc = ib_flush_fmr_pool(fpo->fpo_fmr_pool); - LASSERT(rc == 0); + LASSERT(!rc); } fmr->fmr_pool = NULL; @@ -1563,11 +1519,9 @@ int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages, if (fps->fps_increasing) { spin_unlock(&fps->fps_lock); - CDEBUG(D_NET, - "Another thread is allocating new FMR pool, waiting for her to complete\n"); + CDEBUG(D_NET, "Another thread is allocating new FMR pool, waiting for her to complete\n"); schedule(); goto again; - } if (time_before(cfs_time_current(), fps->fps_next_retry)) { @@ -1583,7 +1537,7 @@ int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages, rc = kiblnd_create_fmr_pool(fps, &fpo); spin_lock(&fps->fps_lock); fps->fps_increasing = 0; - if (rc == 0) { + if (!rc) { fps->fps_version++; list_add_tail(&fpo->fpo_list, &fps->fps_pool_list); } else { @@ -1597,7 +1551,7 @@ int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages, static void kiblnd_fini_pool(kib_pool_t *pool) { LASSERT(list_empty(&pool->po_free_list)); - LASSERT(pool->po_allocated == 0); + LASSERT(!pool->po_allocated); CDEBUG(D_NET, "Finalize %s pool\n", pool->po_owner->ps_name); } @@ -1606,7 +1560,7 @@ static void kiblnd_init_pool(kib_poolset_t *ps, kib_pool_t *pool, int size) { CDEBUG(D_NET, "Initialize %s pool\n", ps->ps_name); - memset(pool, 0, sizeof(kib_pool_t)); + memset(pool, 0, sizeof(*pool)); INIT_LIST_HEAD(&pool->po_free_list); pool->po_deadline = cfs_time_shift(IBLND_POOL_DEADLINE); pool->po_owner = ps; @@ -1621,14 +1575,14 @@ static void kiblnd_destroy_pool_list(struct list_head *head) pool = list_entry(head->next, kib_pool_t, po_list); list_del(&pool->po_list); - LASSERT(pool->po_owner != NULL); + LASSERT(pool->po_owner); pool->po_owner->ps_pool_destroy(pool); } } static void kiblnd_fail_poolset(kib_poolset_t *ps, struct list_head *zombies) { - if (ps->ps_net == NULL) /* intialized? */ + if (!ps->ps_net) /* intialized? */ return; spin_lock(&ps->ps_lock); @@ -1637,7 +1591,7 @@ static void kiblnd_fail_poolset(kib_poolset_t *ps, struct list_head *zombies) kib_pool_t, po_list); po->po_failed = 1; list_del(&po->po_list); - if (po->po_allocated == 0) + if (!po->po_allocated) list_add(&po->po_list, zombies); else list_add(&po->po_list, &ps->ps_failed_pool_list); @@ -1647,7 +1601,7 @@ static void kiblnd_fail_poolset(kib_poolset_t *ps, struct list_head *zombies) static void kiblnd_fini_poolset(kib_poolset_t *ps) { - if (ps->ps_net != NULL) { /* initialized? */ + if (ps->ps_net) { /* initialized? */ kiblnd_destroy_pool_list(&ps->ps_failed_pool_list); kiblnd_destroy_pool_list(&ps->ps_pool_list); } @@ -1663,7 +1617,7 @@ static int kiblnd_init_poolset(kib_poolset_t *ps, int cpt, kib_pool_t *pool; int rc; - memset(ps, 0, sizeof(kib_poolset_t)); + memset(ps, 0, sizeof(*ps)); ps->ps_cpt = cpt; ps->ps_net = net; @@ -1680,7 +1634,7 @@ static int kiblnd_init_poolset(kib_poolset_t *ps, int cpt, INIT_LIST_HEAD(&ps->ps_failed_pool_list); rc = ps->ps_pool_create(ps, size, &pool); - if (rc == 0) + if (!rc) list_add(&pool->po_list, &ps->ps_pool_list); else CERROR("Failed to create the first pool for %s\n", ps->ps_name); @@ -1690,7 +1644,7 @@ static int kiblnd_init_poolset(kib_poolset_t *ps, int cpt, static int kiblnd_pool_is_idle(kib_pool_t *pool, unsigned long now) { - if (pool->po_allocated != 0) /* still in use */ + if (pool->po_allocated) /* still in use */ return 0; if (pool->po_failed) return 1; @@ -1706,7 +1660,7 @@ void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node) spin_lock(&ps->ps_lock); - if (ps->ps_node_fini != NULL) + if (ps->ps_node_fini) ps->ps_node_fini(pool, node); LASSERT(pool->po_allocated > 0); @@ -1731,6 +1685,9 @@ struct list_head *kiblnd_pool_alloc_node(kib_poolset_t *ps) { struct list_head *node; kib_pool_t *pool; + unsigned int interval = 1; + unsigned long time_before; + unsigned int trips = 0; int rc; again: @@ -1744,7 +1701,7 @@ struct list_head *kiblnd_pool_alloc_node(kib_poolset_t *ps) node = pool->po_free_list.next; list_del(node); - if (ps->ps_node_init != NULL) { + if (ps->ps_node_init) { /* still hold the lock */ ps->ps_node_init(pool, node); } @@ -1756,9 +1713,15 @@ struct list_head *kiblnd_pool_alloc_node(kib_poolset_t *ps) if (ps->ps_increasing) { /* another thread is allocating a new pool */ spin_unlock(&ps->ps_lock); - CDEBUG(D_NET, "Another thread is allocating new %s pool, waiting for her to complete\n", - ps->ps_name); - schedule(); + trips++; + CDEBUG(D_NET, "Another thread is allocating new %s pool, waiting %d HZs for her to complete. trips = %d\n", + ps->ps_name, interval, trips); + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(interval); + if (interval < cfs_time_seconds(1)) + interval *= 2; + goto again; } @@ -1772,12 +1735,14 @@ struct list_head *kiblnd_pool_alloc_node(kib_poolset_t *ps) spin_unlock(&ps->ps_lock); CDEBUG(D_NET, "%s pool exhausted, allocate new pool\n", ps->ps_name); - + time_before = cfs_time_current(); rc = ps->ps_pool_create(ps, ps->ps_pool_size, &pool); + CDEBUG(D_NET, "ps_pool_create took %lu HZ to complete", + cfs_time_current() - time_before); spin_lock(&ps->ps_lock); ps->ps_increasing = 0; - if (rc == 0) { + if (!rc) { list_add_tail(&pool->po_list, &ps->ps_pool_list); } else { ps->ps_next_retry = cfs_time_shift(IBLND_POOL_RETRY); @@ -1794,37 +1759,37 @@ static void kiblnd_destroy_tx_pool(kib_pool_t *pool) kib_tx_pool_t *tpo = container_of(pool, kib_tx_pool_t, tpo_pool); int i; - LASSERT(pool->po_allocated == 0); + LASSERT(!pool->po_allocated); - if (tpo->tpo_tx_pages != NULL) { + if (tpo->tpo_tx_pages) { kiblnd_unmap_tx_pool(tpo); kiblnd_free_pages(tpo->tpo_tx_pages); } - if (tpo->tpo_tx_descs == NULL) + if (!tpo->tpo_tx_descs) goto out; for (i = 0; i < pool->po_size; i++) { kib_tx_t *tx = &tpo->tpo_tx_descs[i]; list_del(&tx->tx_list); - if (tx->tx_pages != NULL) + if (tx->tx_pages) LIBCFS_FREE(tx->tx_pages, LNET_MAX_IOV * sizeof(*tx->tx_pages)); - if (tx->tx_frags != NULL) + if (tx->tx_frags) LIBCFS_FREE(tx->tx_frags, IBLND_MAX_RDMA_FRAGS * sizeof(*tx->tx_frags)); - if (tx->tx_wrq != NULL) + if (tx->tx_wrq) LIBCFS_FREE(tx->tx_wrq, (1 + IBLND_MAX_RDMA_FRAGS) * sizeof(*tx->tx_wrq)); - if (tx->tx_sge != NULL) + if (tx->tx_sge) LIBCFS_FREE(tx->tx_sge, (1 + IBLND_MAX_RDMA_FRAGS) * sizeof(*tx->tx_sge)); - if (tx->tx_rd != NULL) + if (tx->tx_rd) LIBCFS_FREE(tx->tx_rd, offsetof(kib_rdma_desc_t, rd_frags[IBLND_MAX_RDMA_FRAGS])); @@ -1834,7 +1799,7 @@ static void kiblnd_destroy_tx_pool(kib_pool_t *pool) pool->po_size * sizeof(kib_tx_t)); out: kiblnd_fini_pool(pool); - LIBCFS_FREE(tpo, sizeof(kib_tx_pool_t)); + LIBCFS_FREE(tpo, sizeof(*tpo)); } static int kiblnd_tx_pool_size(int ncpts) @@ -1853,7 +1818,7 @@ static int kiblnd_create_tx_pool(kib_poolset_t *ps, int size, kib_tx_pool_t *tpo; LIBCFS_CPT_ALLOC(tpo, lnet_cpt_table(), ps->ps_cpt, sizeof(*tpo)); - if (tpo == NULL) { + if (!tpo) { CERROR("Failed to allocate TX pool\n"); return -ENOMEM; } @@ -1864,15 +1829,15 @@ static int kiblnd_create_tx_pool(kib_poolset_t *ps, int size, tpo->tpo_tx_pages = NULL; npg = (size * IBLND_MSG_SIZE + PAGE_SIZE - 1) / PAGE_SIZE; - if (kiblnd_alloc_pages(&tpo->tpo_tx_pages, ps->ps_cpt, npg) != 0) { + if (kiblnd_alloc_pages(&tpo->tpo_tx_pages, ps->ps_cpt, npg)) { CERROR("Can't allocate tx pages: %d\n", npg); - LIBCFS_FREE(tpo, sizeof(kib_tx_pool_t)); + LIBCFS_FREE(tpo, sizeof(*tpo)); return -ENOMEM; } LIBCFS_CPT_ALLOC(tpo->tpo_tx_descs, lnet_cpt_table(), ps->ps_cpt, size * sizeof(kib_tx_t)); - if (tpo->tpo_tx_descs == NULL) { + if (!tpo->tpo_tx_descs) { CERROR("Can't allocate %d tx descriptors\n", size); ps->ps_pool_destroy(pool); return -ENOMEM; @@ -1884,17 +1849,17 @@ static int kiblnd_create_tx_pool(kib_poolset_t *ps, int size, kib_tx_t *tx = &tpo->tpo_tx_descs[i]; tx->tx_pool = tpo; - if (ps->ps_net->ibn_fmr_ps != NULL) { + if (ps->ps_net->ibn_fmr_ps) { LIBCFS_CPT_ALLOC(tx->tx_pages, lnet_cpt_table(), ps->ps_cpt, LNET_MAX_IOV * sizeof(*tx->tx_pages)); - if (tx->tx_pages == NULL) + if (!tx->tx_pages) break; } LIBCFS_CPT_ALLOC(tx->tx_frags, lnet_cpt_table(), ps->ps_cpt, IBLND_MAX_RDMA_FRAGS * sizeof(*tx->tx_frags)); - if (tx->tx_frags == NULL) + if (!tx->tx_frags) break; sg_init_table(tx->tx_frags, IBLND_MAX_RDMA_FRAGS); @@ -1902,19 +1867,19 @@ static int kiblnd_create_tx_pool(kib_poolset_t *ps, int size, LIBCFS_CPT_ALLOC(tx->tx_wrq, lnet_cpt_table(), ps->ps_cpt, (1 + IBLND_MAX_RDMA_FRAGS) * sizeof(*tx->tx_wrq)); - if (tx->tx_wrq == NULL) + if (!tx->tx_wrq) break; LIBCFS_CPT_ALLOC(tx->tx_sge, lnet_cpt_table(), ps->ps_cpt, (1 + IBLND_MAX_RDMA_FRAGS) * sizeof(*tx->tx_sge)); - if (tx->tx_sge == NULL) + if (!tx->tx_sge) break; LIBCFS_CPT_ALLOC(tx->tx_rd, lnet_cpt_table(), ps->ps_cpt, offsetof(kib_rdma_desc_t, rd_frags[IBLND_MAX_RDMA_FRAGS])); - if (tx->tx_rd == NULL) + if (!tx->tx_rd) break; } @@ -1945,23 +1910,23 @@ static void kiblnd_net_fini_pools(kib_net_t *net) kib_tx_poolset_t *tps; kib_fmr_poolset_t *fps; - if (net->ibn_tx_ps != NULL) { + if (net->ibn_tx_ps) { tps = net->ibn_tx_ps[i]; kiblnd_fini_poolset(&tps->tps_poolset); } - if (net->ibn_fmr_ps != NULL) { + if (net->ibn_fmr_ps) { fps = net->ibn_fmr_ps[i]; kiblnd_fini_fmr_poolset(fps); } } - if (net->ibn_tx_ps != NULL) { + if (net->ibn_tx_ps) { cfs_percpt_free(net->ibn_tx_ps); net->ibn_tx_ps = NULL; } - if (net->ibn_fmr_ps != NULL) { + if (net->ibn_fmr_ps) { cfs_percpt_free(net->ibn_fmr_ps); net->ibn_fmr_ps = NULL; } @@ -1975,8 +1940,7 @@ static int kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts) int i; read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - if (*kiblnd_tunables.kib_map_on_demand == 0 && - net->ibn_dev->ibd_hdev->ibh_nmrs == 1) { + if (!*kiblnd_tunables.kib_map_on_demand) { read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); goto create_tx_pool; } @@ -1996,7 +1960,7 @@ static int kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts) * TX pool must be created later than FMR, see LU-2268 * for details */ - LASSERT(net->ibn_tx_ps == NULL); + LASSERT(!net->ibn_tx_ps); /* * premapping can fail if ibd_nmr > 1, so we always create @@ -2005,56 +1969,45 @@ static int kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts) net->ibn_fmr_ps = cfs_percpt_alloc(lnet_cpt_table(), sizeof(kib_fmr_poolset_t)); - if (net->ibn_fmr_ps == NULL) { + if (!net->ibn_fmr_ps) { CERROR("Failed to allocate FMR pool array\n"); rc = -ENOMEM; goto failed; } for (i = 0; i < ncpts; i++) { - cpt = (cpts == NULL) ? i : cpts[i]; + cpt = !cpts ? i : cpts[i]; rc = kiblnd_init_fmr_poolset(net->ibn_fmr_ps[cpt], cpt, net, kiblnd_fmr_pool_size(ncpts), kiblnd_fmr_flush_trigger(ncpts)); - if (rc == -ENOSYS && i == 0) /* no FMR */ - break; - - if (rc != 0) { /* a real error */ + if (rc) { CERROR("Can't initialize FMR pool for CPT %d: %d\n", cpt, rc); goto failed; } } - if (i > 0) { + if (i > 0) LASSERT(i == ncpts); - goto create_tx_pool; - } - - cfs_percpt_free(net->ibn_fmr_ps); - net->ibn_fmr_ps = NULL; - - CWARN("Device does not support FMR\n"); - goto failed; create_tx_pool: net->ibn_tx_ps = cfs_percpt_alloc(lnet_cpt_table(), sizeof(kib_tx_poolset_t)); - if (net->ibn_tx_ps == NULL) { + if (!net->ibn_tx_ps) { CERROR("Failed to allocate tx pool array\n"); rc = -ENOMEM; goto failed; } for (i = 0; i < ncpts; i++) { - cpt = (cpts == NULL) ? i : cpts[i]; + cpt = !cpts ? i : cpts[i]; rc = kiblnd_init_poolset(&net->ibn_tx_ps[cpt]->tps_poolset, cpt, net, "TX", kiblnd_tx_pool_size(ncpts), kiblnd_create_tx_pool, kiblnd_destroy_tx_pool, kiblnd_tx_init, NULL); - if (rc != 0) { + if (rc) { CERROR("Can't initialize TX pool for CPT %d: %d\n", cpt, rc); goto failed; @@ -2064,14 +2017,16 @@ static int kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts) return 0; failed: kiblnd_net_fini_pools(net); - LASSERT(rc != 0); + LASSERT(rc); return rc; } static int kiblnd_hdev_get_attr(kib_hca_dev_t *hdev) { - /* It's safe to assume a HCA can handle a page size - * matching that of the native system */ + /* + * It's safe to assume a HCA can handle a page size + * matching that of the native system + */ hdev->ibh_page_shift = PAGE_SHIFT; hdev->ibh_page_size = 1 << PAGE_SHIFT; hdev->ibh_page_mask = ~((__u64)hdev->ibh_page_size - 1); @@ -2082,44 +2037,28 @@ static int kiblnd_hdev_get_attr(kib_hca_dev_t *hdev) return 0; } - for (hdev->ibh_mr_shift = 0; - hdev->ibh_mr_shift < 64; hdev->ibh_mr_shift++) { - if (hdev->ibh_mr_size == (1ULL << hdev->ibh_mr_shift) || - hdev->ibh_mr_size == (1ULL << hdev->ibh_mr_shift) - 1) - return 0; - } - CERROR("Invalid mr size: %#llx\n", hdev->ibh_mr_size); return -EINVAL; } static void kiblnd_hdev_cleanup_mrs(kib_hca_dev_t *hdev) { - int i; - - if (hdev->ibh_nmrs == 0 || hdev->ibh_mrs == NULL) + if (!hdev->ibh_mrs) return; - for (i = 0; i < hdev->ibh_nmrs; i++) { - if (hdev->ibh_mrs[i] == NULL) - break; + ib_dereg_mr(hdev->ibh_mrs); - ib_dereg_mr(hdev->ibh_mrs[i]); - } - - LIBCFS_FREE(hdev->ibh_mrs, sizeof(*hdev->ibh_mrs) * hdev->ibh_nmrs); - hdev->ibh_mrs = NULL; - hdev->ibh_nmrs = 0; + hdev->ibh_mrs = NULL; } void kiblnd_hdev_destroy(kib_hca_dev_t *hdev) { kiblnd_hdev_cleanup_mrs(hdev); - if (hdev->ibh_pd != NULL) + if (hdev->ibh_pd) ib_dealloc_pd(hdev->ibh_pd); - if (hdev->ibh_cmid != NULL) + if (hdev->ibh_cmid) rdma_destroy_id(hdev->ibh_cmid); LIBCFS_FREE(hdev, sizeof(*hdev)); @@ -2132,18 +2071,9 @@ static int kiblnd_hdev_setup_mrs(kib_hca_dev_t *hdev) int acflags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE; rc = kiblnd_hdev_get_attr(hdev); - if (rc != 0) + if (rc) return rc; - LIBCFS_ALLOC(hdev->ibh_mrs, 1 * sizeof(*hdev->ibh_mrs)); - if (hdev->ibh_mrs == NULL) { - CERROR("Failed to allocate MRs table\n"); - return -ENOMEM; - } - - hdev->ibh_mrs[0] = NULL; - hdev->ibh_nmrs = 1; - mr = ib_get_dma_mr(hdev->ibh_pd, acflags); if (IS_ERR(mr)) { CERROR("Failed ib_get_dma_mr : %ld\n", PTR_ERR(mr)); @@ -2151,7 +2081,7 @@ static int kiblnd_hdev_setup_mrs(kib_hca_dev_t *hdev) return PTR_ERR(mr); } - hdev->ibh_mrs[0] = mr; + hdev->ibh_mrs = mr; return 0; } @@ -2170,12 +2100,13 @@ static int kiblnd_dev_need_failover(kib_dev_t *dev) struct sockaddr_in dstaddr; int rc; - if (dev->ibd_hdev == NULL || /* initializing */ - dev->ibd_hdev->ibh_cmid == NULL || /* listener is dead */ + if (!dev->ibd_hdev || /* initializing */ + !dev->ibd_hdev->ibh_cmid || /* listener is dead */ *kiblnd_tunables.kib_dev_failover > 1) /* debugging */ return 1; - /* XXX: it's UGLY, but I don't have better way to find + /* + * XXX: it's UGLY, but I don't have better way to find * ib-bonding HCA failover because: * * a. no reliable CM event for HCA failover... @@ -2184,7 +2115,8 @@ static int kiblnd_dev_need_failover(kib_dev_t *dev) * We have only two choices at this point: * * a. rdma_bind_addr(), it will conflict with listener cmid - * b. rdma_resolve_addr() to zero addr */ + * b. rdma_resolve_addr() to zero addr + */ cmid = kiblnd_rdma_create_id(kiblnd_dummy_callback, dev, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cmid)) { @@ -2201,7 +2133,7 @@ static int kiblnd_dev_need_failover(kib_dev_t *dev) dstaddr.sin_family = AF_INET; rc = rdma_resolve_addr(cmid, (struct sockaddr *)&srcaddr, (struct sockaddr *)&dstaddr, 1); - if (rc != 0 || cmid->device == NULL) { + if (rc || !cmid->device) { CERROR("Failed to bind %s:%pI4h to device(%p): %d\n", dev->ibd_ifname, &dev->ibd_ifip, cmid->device, rc); @@ -2230,24 +2162,27 @@ int kiblnd_dev_failover(kib_dev_t *dev) int i; LASSERT(*kiblnd_tunables.kib_dev_failover > 1 || - dev->ibd_can_failover || - dev->ibd_hdev == NULL); + dev->ibd_can_failover || !dev->ibd_hdev); rc = kiblnd_dev_need_failover(dev); if (rc <= 0) goto out; - if (dev->ibd_hdev != NULL && - dev->ibd_hdev->ibh_cmid != NULL) { - /* XXX it's not good to close old listener at here, + if (dev->ibd_hdev && + dev->ibd_hdev->ibh_cmid) { + /* + * XXX it's not good to close old listener at here, * because we can fail to create new listener. * But we have to close it now, otherwise rdma_bind_addr - * will return EADDRINUSE... How crap! */ + * will return EADDRINUSE... How crap! + */ write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); cmid = dev->ibd_hdev->ibh_cmid; - /* make next schedule of kiblnd_dev_need_failover() - * return 1 for me */ + /* + * make next schedule of kiblnd_dev_need_failover() + * return 1 for me + */ dev->ibd_hdev->ibh_cmid = NULL; write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); @@ -2269,7 +2204,7 @@ int kiblnd_dev_failover(kib_dev_t *dev) /* Bind to failover device or port */ rc = rdma_bind_addr(cmid, (struct sockaddr *)&addr); - if (rc != 0 || cmid->device == NULL) { + if (rc || !cmid->device) { CERROR("Failed to bind %s:%pI4h to device(%p): %d\n", dev->ibd_ifname, &dev->ibd_ifip, cmid->device, rc); @@ -2278,7 +2213,7 @@ int kiblnd_dev_failover(kib_dev_t *dev) } LIBCFS_ALLOC(hdev, sizeof(*hdev)); - if (hdev == NULL) { + if (!hdev) { CERROR("Failed to allocate kib_hca_dev\n"); rdma_destroy_id(cmid); rc = -ENOMEM; @@ -2300,13 +2235,13 @@ int kiblnd_dev_failover(kib_dev_t *dev) hdev->ibh_pd = pd; rc = rdma_listen(cmid, 0); - if (rc != 0) { + if (rc) { CERROR("Can't start new listener: %d\n", rc); goto out; } rc = kiblnd_hdev_setup_mrs(hdev); - if (rc != 0) { + if (rc) { CERROR("Can't setup device: %d\n", rc); goto out; } @@ -2334,10 +2269,10 @@ int kiblnd_dev_failover(kib_dev_t *dev) kiblnd_destroy_pool_list(&zombie_ppo); if (!list_empty(&zombie_fpo)) kiblnd_destroy_fmr_pool_list(&zombie_fpo); - if (hdev != NULL) + if (hdev) kiblnd_hdev_decref(hdev); - if (rc != 0) + if (rc) dev->ibd_failed_failover++; else dev->ibd_failed_failover = 0; @@ -2347,13 +2282,13 @@ int kiblnd_dev_failover(kib_dev_t *dev) void kiblnd_destroy_dev(kib_dev_t *dev) { - LASSERT(dev->ibd_nnets == 0); + LASSERT(!dev->ibd_nnets); LASSERT(list_empty(&dev->ibd_nets)); list_del(&dev->ibd_fail_list); list_del(&dev->ibd_list); - if (dev->ibd_hdev != NULL) + if (dev->ibd_hdev) kiblnd_hdev_decref(dev->ibd_hdev); LIBCFS_FREE(dev, sizeof(*dev)); @@ -2369,7 +2304,7 @@ static kib_dev_t *kiblnd_create_dev(char *ifname) int rc; rc = lnet_ipif_query(ifname, &up, &ip, &netmask); - if (rc != 0) { + if (rc) { CERROR("Can't query IPoIB interface %s: %d\n", ifname, rc); return NULL; @@ -2381,11 +2316,11 @@ static kib_dev_t *kiblnd_create_dev(char *ifname) } LIBCFS_ALLOC(dev, sizeof(*dev)); - if (dev == NULL) + if (!dev) return NULL; netdev = dev_get_by_name(&init_net, ifname); - if (netdev == NULL) { + if (!netdev) { dev->ibd_can_failover = 0; } else { dev->ibd_can_failover = !!(netdev->flags & IFF_MASTER); @@ -2400,14 +2335,13 @@ static kib_dev_t *kiblnd_create_dev(char *ifname) /* initialize the device */ rc = kiblnd_dev_failover(dev); - if (rc != 0) { + if (rc) { CERROR("Can't initialize device: %d\n", rc); LIBCFS_FREE(dev, sizeof(*dev)); return NULL; } - list_add_tail(&dev->ibd_list, - &kiblnd_data.kib_devs); + list_add_tail(&dev->ibd_list, &kiblnd_data.kib_devs); return dev; } @@ -2424,18 +2358,22 @@ static void kiblnd_base_shutdown(void) case IBLND_INIT_ALL: case IBLND_INIT_DATA: - LASSERT(kiblnd_data.kib_peers != NULL); + LASSERT(kiblnd_data.kib_peers); for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) LASSERT(list_empty(&kiblnd_data.kib_peers[i])); LASSERT(list_empty(&kiblnd_data.kib_connd_zombies)); LASSERT(list_empty(&kiblnd_data.kib_connd_conns)); + LASSERT(list_empty(&kiblnd_data.kib_reconn_list)); + LASSERT(list_empty(&kiblnd_data.kib_reconn_wait)); /* flag threads to terminate; wake and wait for them to die */ kiblnd_data.kib_shutdown = 1; - /* NB: we really want to stop scheduler threads net by net + /* + * NB: we really want to stop scheduler threads net by net * instead of the whole module, this should be improved - * with dynamic configuration LNet */ + * with dynamic configuration LNet + */ cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds) wake_up_all(&sched->ibs_waitq); @@ -2443,7 +2381,7 @@ static void kiblnd_base_shutdown(void) wake_up_all(&kiblnd_data.kib_failover_waitq); i = 2; - while (atomic_read(&kiblnd_data.kib_nthreads) != 0) { + while (atomic_read(&kiblnd_data.kib_nthreads)) { i++; /* power of 2 ? */ CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, @@ -2459,20 +2397,20 @@ static void kiblnd_base_shutdown(void) break; } - if (kiblnd_data.kib_peers != NULL) { + if (kiblnd_data.kib_peers) { LIBCFS_FREE(kiblnd_data.kib_peers, sizeof(struct list_head) * kiblnd_data.kib_peer_hash_size); } - if (kiblnd_data.kib_scheds != NULL) + if (kiblnd_data.kib_scheds) cfs_percpt_free(kiblnd_data.kib_scheds); kiblnd_data.kib_init = IBLND_INIT_NOTHING; module_put(THIS_MODULE); } -void kiblnd_shutdown(lnet_ni_t *ni) +static void kiblnd_shutdown(lnet_ni_t *ni) { kib_net_t *net = ni->ni_data; rwlock_t *g_lock = &kiblnd_data.kib_global_lock; @@ -2481,7 +2419,7 @@ void kiblnd_shutdown(lnet_ni_t *ni) LASSERT(kiblnd_data.kib_init == IBLND_INIT_ALL); - if (net == NULL) + if (!net) goto out; write_lock_irqsave(g_lock, flags); @@ -2498,7 +2436,7 @@ void kiblnd_shutdown(lnet_ni_t *ni) /* Wait for all peer state to clean up */ i = 2; - while (atomic_read(&net->ibn_npeers) != 0) { + while (atomic_read(&net->ibn_npeers)) { i++; CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n? */ "%s: waiting for %d peers to disconnect\n", @@ -2519,10 +2457,9 @@ void kiblnd_shutdown(lnet_ni_t *ni) /* fall through */ case IBLND_INIT_NOTHING: - LASSERT(atomic_read(&net->ibn_nconns) == 0); + LASSERT(!atomic_read(&net->ibn_nconns)); - if (net->ibn_dev != NULL && - net->ibn_dev->ibd_nnets == 0) + if (net->ibn_dev && !net->ibn_dev->ibd_nnets) kiblnd_destroy_dev(net->ibn_dev); break; @@ -2558,7 +2495,7 @@ static int kiblnd_base_startup(void) kiblnd_data.kib_peer_hash_size = IBLND_PEER_HASH_SIZE; LIBCFS_ALLOC(kiblnd_data.kib_peers, sizeof(struct list_head) * kiblnd_data.kib_peer_hash_size); - if (kiblnd_data.kib_peers == NULL) + if (!kiblnd_data.kib_peers) goto failed; for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) INIT_LIST_HEAD(&kiblnd_data.kib_peers[i]); @@ -2566,12 +2503,15 @@ static int kiblnd_base_startup(void) spin_lock_init(&kiblnd_data.kib_connd_lock); INIT_LIST_HEAD(&kiblnd_data.kib_connd_conns); INIT_LIST_HEAD(&kiblnd_data.kib_connd_zombies); + INIT_LIST_HEAD(&kiblnd_data.kib_reconn_list); + INIT_LIST_HEAD(&kiblnd_data.kib_reconn_wait); + init_waitqueue_head(&kiblnd_data.kib_connd_waitq); init_waitqueue_head(&kiblnd_data.kib_failover_waitq); kiblnd_data.kib_scheds = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*sched)); - if (kiblnd_data.kib_scheds == NULL) + if (!kiblnd_data.kib_scheds) goto failed; cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds) { @@ -2585,8 +2525,10 @@ static int kiblnd_base_startup(void) if (*kiblnd_tunables.kib_nscheds > 0) { nthrs = min(nthrs, *kiblnd_tunables.kib_nscheds); } else { - /* max to half of CPUs, another half is reserved for - * upper layer modules */ + /* + * max to half of CPUs, another half is reserved for + * upper layer modules + */ nthrs = min(max(IBLND_N_SCHED, nthrs >> 1), nthrs); } @@ -2601,16 +2543,16 @@ static int kiblnd_base_startup(void) /*****************************************************/ rc = kiblnd_thread_start(kiblnd_connd, NULL, "kiblnd_connd"); - if (rc != 0) { + if (rc) { CERROR("Can't spawn o2iblnd connd: %d\n", rc); goto failed; } - if (*kiblnd_tunables.kib_dev_failover != 0) + if (*kiblnd_tunables.kib_dev_failover) rc = kiblnd_thread_start(kiblnd_failover_thread, NULL, "kiblnd_failover"); - if (rc != 0) { + if (rc) { CERROR("Can't spawn o2iblnd failover thread: %d\n", rc); goto failed; } @@ -2632,7 +2574,7 @@ static int kiblnd_start_schedulers(struct kib_sched_info *sched) int nthrs; int i; - if (sched->ibs_nthreads == 0) { + if (!sched->ibs_nthreads) { if (*kiblnd_tunables.kib_nscheds > 0) { nthrs = sched->ibs_nthreads_max; } else { @@ -2655,7 +2597,7 @@ static int kiblnd_start_schedulers(struct kib_sched_info *sched) snprintf(name, sizeof(name), "kiblnd_sd_%02ld_%02ld", KIB_THREAD_CPT(id), KIB_THREAD_TID(id)); rc = kiblnd_thread_start(kiblnd_scheduler, (void *)id, name); - if (rc == 0) + if (!rc) continue; CERROR("Can't spawn thread %d for scheduler[%d]: %d\n", @@ -2677,14 +2619,14 @@ static int kiblnd_dev_start_threads(kib_dev_t *dev, int newdev, __u32 *cpts, for (i = 0; i < ncpts; i++) { struct kib_sched_info *sched; - cpt = (cpts == NULL) ? i : cpts[i]; + cpt = !cpts ? i : cpts[i]; sched = kiblnd_data.kib_scheds[cpt]; if (!newdev && sched->ibs_nthreads > 0) continue; rc = kiblnd_start_schedulers(kiblnd_data.kib_scheds[cpt]); - if (rc != 0) { + if (rc) { CERROR("Failed to start scheduler threads for %s\n", dev->ibd_ifname); return rc; @@ -2702,30 +2644,30 @@ static kib_dev_t *kiblnd_dev_search(char *ifname) colon = strchr(ifname, ':'); list_for_each_entry(dev, &kiblnd_data.kib_devs, ibd_list) { - if (strcmp(&dev->ibd_ifname[0], ifname) == 0) + if (!strcmp(&dev->ibd_ifname[0], ifname)) return dev; - if (alias != NULL) + if (alias) continue; colon2 = strchr(dev->ibd_ifname, ':'); - if (colon != NULL) + if (colon) *colon = 0; - if (colon2 != NULL) + if (colon2) *colon2 = 0; - if (strcmp(&dev->ibd_ifname[0], ifname) == 0) + if (!strcmp(&dev->ibd_ifname[0], ifname)) alias = dev; - if (colon != NULL) + if (colon) *colon = ':'; - if (colon2 != NULL) + if (colon2) *colon2 = ':'; } return alias; } -int kiblnd_startup(lnet_ni_t *ni) +static int kiblnd_startup(lnet_ni_t *ni) { char *ifname; kib_dev_t *ibdev = NULL; @@ -2739,13 +2681,13 @@ int kiblnd_startup(lnet_ni_t *ni) if (kiblnd_data.kib_init == IBLND_INIT_NOTHING) { rc = kiblnd_base_startup(); - if (rc != 0) + if (rc) return rc; } LIBCFS_ALLOC(net, sizeof(*net)); ni->ni_data = net; - if (net == NULL) + if (!net) goto net_failed; ktime_get_real_ts64(&tv); @@ -2757,11 +2699,11 @@ int kiblnd_startup(lnet_ni_t *ni) ni->ni_peertxcredits = *kiblnd_tunables.kib_peertxcredits; ni->ni_peerrtrcredits = *kiblnd_tunables.kib_peerrtrcredits; - if (ni->ni_interfaces[0] != NULL) { + if (ni->ni_interfaces[0]) { /* Use the IPoIB interface specified in 'networks=' */ CLASSERT(LNET_MAX_INTERFACES > 1); - if (ni->ni_interfaces[1] != NULL) { + if (ni->ni_interfaces[1]) { CERROR("Multiple interfaces not supported\n"); goto failed; } @@ -2778,12 +2720,12 @@ int kiblnd_startup(lnet_ni_t *ni) ibdev = kiblnd_dev_search(ifname); - newdev = ibdev == NULL; + newdev = !ibdev; /* hmm...create kib_dev even for alias */ - if (ibdev == NULL || strcmp(&ibdev->ibd_ifname[0], ifname) != 0) + if (!ibdev || strcmp(&ibdev->ibd_ifname[0], ifname)) ibdev = kiblnd_create_dev(ifname); - if (ibdev == NULL) + if (!ibdev) goto failed; net->ibn_dev = ibdev; @@ -2791,11 +2733,11 @@ int kiblnd_startup(lnet_ni_t *ni) rc = kiblnd_dev_start_threads(ibdev, newdev, ni->ni_cpts, ni->ni_ncpts); - if (rc != 0) + if (rc) goto failed; rc = kiblnd_net_init_pools(net, ni->ni_cpts, ni->ni_ncpts); - if (rc != 0) { + if (rc) { CERROR("Failed to initialize NI pools: %d\n", rc); goto failed; } @@ -2810,7 +2752,7 @@ int kiblnd_startup(lnet_ni_t *ni) return 0; failed: - if (net->ibn_dev == NULL && ibdev != NULL) + if (!net->ibn_dev && ibdev) kiblnd_destroy_dev(ibdev); net_failed: @@ -2820,25 +2762,35 @@ net_failed: return -ENETDOWN; } -static void __exit kiblnd_module_fini(void) +static lnd_t the_o2iblnd = { + .lnd_type = O2IBLND, + .lnd_startup = kiblnd_startup, + .lnd_shutdown = kiblnd_shutdown, + .lnd_ctl = kiblnd_ctl, + .lnd_query = kiblnd_query, + .lnd_send = kiblnd_send, + .lnd_recv = kiblnd_recv, +}; + +static void __exit ko2iblnd_exit(void) { lnet_unregister_lnd(&the_o2iblnd); } -static int __init kiblnd_module_init(void) +static int __init ko2iblnd_init(void) { int rc; CLASSERT(sizeof(kib_msg_t) <= IBLND_MSG_SIZE); CLASSERT(offsetof(kib_msg_t, - ibm_u.get.ibgm_rd.rd_frags[IBLND_MAX_RDMA_FRAGS]) - <= IBLND_MSG_SIZE); + ibm_u.get.ibgm_rd.rd_frags[IBLND_MAX_RDMA_FRAGS]) + <= IBLND_MSG_SIZE); CLASSERT(offsetof(kib_msg_t, - ibm_u.putack.ibpam_rd.rd_frags[IBLND_MAX_RDMA_FRAGS]) - <= IBLND_MSG_SIZE); + ibm_u.putack.ibpam_rd.rd_frags[IBLND_MAX_RDMA_FRAGS]) + <= IBLND_MSG_SIZE); rc = kiblnd_tunables_init(); - if (rc != 0) + if (rc) return rc; lnet_register_lnd(&the_o2iblnd); @@ -2847,8 +2799,9 @@ static int __init kiblnd_module_init(void) } MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>"); -MODULE_DESCRIPTION("Kernel OpenIB gen2 LND v2.00"); +MODULE_DESCRIPTION("OpenIB gen2 LNet Network Driver"); +MODULE_VERSION("2.7.0"); MODULE_LICENSE("GPL"); -module_init(kiblnd_module_init); -module_exit(kiblnd_module_fini); +module_init(ko2iblnd_init); +module_exit(ko2iblnd_exit); diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h index 025faa9f8..bfcbdd167 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h @@ -60,17 +60,17 @@ #include <net/sock.h> #include <linux/in.h> +#include <rdma/rdma_cm.h> +#include <rdma/ib_cm.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_fmr_pool.h> + #define DEBUG_SUBSYSTEM S_LND #include "../../../include/linux/libcfs/libcfs.h" #include "../../../include/linux/lnet/lnet.h" #include "../../../include/linux/lnet/lib-lnet.h" -#include <rdma/rdma_cm.h> -#include <rdma/ib_cm.h> -#include <rdma/ib_verbs.h> -#include <rdma/ib_fmr_pool.h> - #define IBLND_PEER_HASH_SIZE 101 /* # peer lists */ /* # scheduler loops before reschedule */ #define IBLND_RESCHED 100 @@ -146,9 +146,9 @@ kiblnd_concurrent_sends_v1(void) #define IBLND_OOB_CAPABLE(v) ((v) != IBLND_MSG_VERSION_1) #define IBLND_OOB_MSGS(v) (IBLND_OOB_CAPABLE(v) ? 2 : 0) -#define IBLND_MSG_SIZE (4<<10) /* max size of queued messages (inc hdr) */ +#define IBLND_MSG_SIZE (4 << 10) /* max size of queued messages (inc hdr) */ #define IBLND_MAX_RDMA_FRAGS LNET_MAX_IOV /* max # of fragments supported */ -#define IBLND_CFG_RDMA_FRAGS (*kiblnd_tunables.kib_map_on_demand != 0 ? \ +#define IBLND_CFG_RDMA_FRAGS (*kiblnd_tunables.kib_map_on_demand ? \ *kiblnd_tunables.kib_map_on_demand : \ IBLND_MAX_RDMA_FRAGS) /* max # of fragments configured by user */ #define IBLND_RDMA_FRAGS(v) ((v) == IBLND_MSG_VERSION_1 ? \ @@ -162,18 +162,17 @@ kiblnd_concurrent_sends_v1(void) #define IBLND_FMR_POOL 256 #define IBLND_FMR_POOL_FLUSH 192 -/* TX messages (shared by all connections) */ -#define IBLND_TX_MSGS() (*kiblnd_tunables.kib_ntx) - -/* RX messages (per connection) */ -#define IBLND_RX_MSGS(v) (IBLND_MSG_QUEUE_SIZE(v) * 2 + IBLND_OOB_MSGS(v)) -#define IBLND_RX_MSG_BYTES(v) (IBLND_RX_MSGS(v) * IBLND_MSG_SIZE) -#define IBLND_RX_MSG_PAGES(v) ((IBLND_RX_MSG_BYTES(v) + PAGE_SIZE - 1) / PAGE_SIZE) +#define IBLND_RX_MSGS(c) \ + ((c->ibc_queue_depth) * 2 + IBLND_OOB_MSGS(c->ibc_version)) +#define IBLND_RX_MSG_BYTES(c) (IBLND_RX_MSGS(c) * IBLND_MSG_SIZE) +#define IBLND_RX_MSG_PAGES(c) \ + ((IBLND_RX_MSG_BYTES(c) + PAGE_SIZE - 1) / PAGE_SIZE) /* WRs and CQEs (per connection) */ -#define IBLND_RECV_WRS(v) IBLND_RX_MSGS(v) -#define IBLND_SEND_WRS(v) ((IBLND_RDMA_FRAGS(v) + 1) * IBLND_CONCURRENT_SENDS(v)) -#define IBLND_CQ_ENTRIES(v) (IBLND_RECV_WRS(v) + IBLND_SEND_WRS(v)) +#define IBLND_RECV_WRS(c) IBLND_RX_MSGS(c) +#define IBLND_SEND_WRS(c) \ + ((c->ibc_max_frags + 1) * IBLND_CONCURRENT_SENDS(c->ibc_version)) +#define IBLND_CQ_ENTRIES(c) (IBLND_RECV_WRS(c) + IBLND_SEND_WRS(c)) struct kib_hca_dev; @@ -209,8 +208,7 @@ typedef struct kib_hca_dev { __u64 ibh_page_mask; /* page mask of current HCA */ int ibh_mr_shift; /* bits shift of max MR size */ __u64 ibh_mr_size; /* size of MR */ - int ibh_nmrs; /* # of global MRs */ - struct ib_mr **ibh_mrs; /* global MR */ + struct ib_mr *ibh_mrs; /* global MR */ struct ib_pd *ibh_pd; /* PD */ kib_dev_t *ibh_dev; /* owner */ atomic_t ibh_ref; /* refcount */ @@ -350,6 +348,16 @@ typedef struct { void *kib_connd; /* the connd task (serialisation assertions) */ struct list_head kib_connd_conns; /* connections to setup/teardown */ struct list_head kib_connd_zombies; /* connections with zero refcount */ + /* connections to reconnect */ + struct list_head kib_reconn_list; + /* peers wait for reconnection */ + struct list_head kib_reconn_wait; + /** + * The second that peers are pulled out from \a kib_reconn_wait + * for reconnection. + */ + time64_t kib_reconn_sec; + wait_queue_head_t kib_connd_waitq; /* connection daemon sleeps here */ spinlock_t kib_connd_lock; /* serialise */ struct ib_qp_attr kib_error_qpa; /* QP->ERROR */ @@ -465,10 +473,10 @@ typedef struct { #define IBLND_REJECT_FATAL 3 /* Anything else */ #define IBLND_REJECT_CONN_UNCOMPAT 4 /* incompatible version peer */ #define IBLND_REJECT_CONN_STALE 5 /* stale peer */ -#define IBLND_REJECT_RDMA_FRAGS 6 /* Fatal: peer's rdma frags can't match */ - /* mine */ -#define IBLND_REJECT_MSG_QUEUE_SIZE 7 /* Fatal: peer's msg queue size can't */ - /* match mine */ +/* peer's rdma frags doesn't match mine */ +#define IBLND_REJECT_RDMA_FRAGS 6 +/* peer's msg queue size doesn't match mine */ +#define IBLND_REJECT_MSG_QUEUE_SIZE 7 /***********************************************************************/ @@ -527,6 +535,8 @@ typedef struct kib_conn { struct list_head ibc_list; /* stash on peer's conn list */ struct list_head ibc_sched_list; /* schedule for attention */ __u16 ibc_version; /* version of connection */ + /* reconnect later */ + __u16 ibc_reconnect:1; __u64 ibc_incarnation; /* which instance of the peer */ atomic_t ibc_refcount; /* # users */ int ibc_state; /* what's happening */ @@ -536,6 +546,10 @@ typedef struct kib_conn { int ibc_outstanding_credits; /* # credits to return */ int ibc_reserved_credits; /* # ACK/DONE msg credits */ int ibc_comms_error; /* set on comms error */ + /* connections queue depth */ + __u16 ibc_queue_depth; + /* connections max frags */ + __u16 ibc_max_frags; unsigned int ibc_nrx:16; /* receive buffers owned */ unsigned int ibc_scheduled:1; /* scheduled for attention */ unsigned int ibc_ready:1; /* CQ callback fired */ @@ -572,18 +586,29 @@ typedef struct kib_peer { struct list_head ibp_list; /* stash on global peer list */ lnet_nid_t ibp_nid; /* who's on the other end(s) */ lnet_ni_t *ibp_ni; /* LNet interface */ - atomic_t ibp_refcount; /* # users */ struct list_head ibp_conns; /* all active connections */ struct list_head ibp_tx_queue; /* msgs waiting for a conn */ - __u16 ibp_version; /* version of peer */ __u64 ibp_incarnation; /* incarnation of peer */ - int ibp_connecting; /* current active connection attempts - */ - int ibp_accepting; /* current passive connection attempts - */ - int ibp_error; /* errno on closing this peer */ - unsigned long ibp_last_alive; /* when (in jiffies) I was last alive - */ + /* when (in jiffies) I was last alive */ + unsigned long ibp_last_alive; + /* # users */ + atomic_t ibp_refcount; + /* version of peer */ + __u16 ibp_version; + /* current passive connection attempts */ + unsigned short ibp_accepting; + /* current active connection attempts */ + unsigned short ibp_connecting; + /* reconnect this peer later */ + unsigned short ibp_reconnecting:1; + /* # consecutive reconnection attempts to this peer */ + unsigned int ibp_reconnected; + /* errno on closing this peer */ + int ibp_error; + /* max map_on_demand */ + __u16 ibp_max_frags; + /* max_peer_credits */ + __u16 ibp_queue_depth; } kib_peer_t; extern kib_data_t kiblnd_data; @@ -611,7 +636,7 @@ kiblnd_dev_can_failover(kib_dev_t *dev) if (!list_empty(&dev->ibd_fail_list)) /* already scheduled */ return 0; - if (*kiblnd_tunables.kib_dev_failover == 0) /* disabled */ + if (!*kiblnd_tunables.kib_dev_failover) /* disabled */ return 0; if (*kiblnd_tunables.kib_dev_failover > 1) /* force failover */ @@ -661,6 +686,20 @@ do { \ kiblnd_destroy_peer(peer); \ } while (0) +static inline bool +kiblnd_peer_connecting(kib_peer_t *peer) +{ + return peer->ibp_connecting || + peer->ibp_reconnecting || + peer->ibp_accepting; +} + +static inline bool +kiblnd_peer_idle(kib_peer_t *peer) +{ + return !kiblnd_peer_connecting(peer) && list_empty(&peer->ibp_conns); +} + static inline struct list_head * kiblnd_nid2peerlist(lnet_nid_t nid) { @@ -691,7 +730,8 @@ kiblnd_send_keepalive(kib_conn_t *conn) { return (*kiblnd_tunables.kib_keepalive > 0) && cfs_time_after(jiffies, conn->ibc_last_send + - *kiblnd_tunables.kib_keepalive*HZ); + msecs_to_jiffies(*kiblnd_tunables.kib_keepalive * + MSEC_PER_SEC)); } static inline int @@ -710,16 +750,16 @@ kiblnd_need_noop(kib_conn_t *conn) /* No tx to piggyback NOOP onto or no credit to send a tx */ return (list_empty(&conn->ibc_tx_queue) || - conn->ibc_credits == 0); + !conn->ibc_credits); } if (!list_empty(&conn->ibc_tx_noops) || /* NOOP already queued */ !list_empty(&conn->ibc_tx_queue_nocred) || /* piggyback NOOP */ - conn->ibc_credits == 0) /* no credit */ + !conn->ibc_credits) /* no credit */ return 0; if (conn->ibc_credits == 1 && /* last credit reserved for */ - conn->ibc_outstanding_credits == 0) /* giving back credits */ + !conn->ibc_outstanding_credits) /* giving back credits */ return 0; /* No tx to piggyback NOOP onto or no credit to send a tx */ @@ -755,18 +795,19 @@ kiblnd_queue2str(kib_conn_t *conn, struct list_head *q) /* CAVEAT EMPTOR: We rely on descriptor alignment to allow us to use the */ /* lowest bits of the work request id to stash the work item type. */ -#define IBLND_WID_TX 0 -#define IBLND_WID_RDMA 1 -#define IBLND_WID_RX 2 -#define IBLND_WID_MASK 3UL +#define IBLND_WID_INVAL 0 +#define IBLND_WID_TX 1 +#define IBLND_WID_RX 2 +#define IBLND_WID_RDMA 3 +#define IBLND_WID_MASK 3UL static inline __u64 kiblnd_ptr2wreqid(void *ptr, int type) { unsigned long lptr = (unsigned long)ptr; - LASSERT((lptr & IBLND_WID_MASK) == 0); - LASSERT((type & ~IBLND_WID_MASK) == 0); + LASSERT(!(lptr & IBLND_WID_MASK)); + LASSERT(!(type & ~IBLND_WID_MASK)); return (__u64)(lptr | type); } @@ -907,9 +948,8 @@ static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev, #define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len) struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev, - kib_rdma_desc_t *rd); -struct ib_mr *kiblnd_find_dma_mr(kib_hca_dev_t *hdev, - __u64 addr, __u64 size); + kib_rdma_desc_t *rd, + int negotiated_nfrags); void kiblnd_map_rx_descs(kib_conn_t *conn); void kiblnd_unmap_rx_descs(kib_conn_t *conn); void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node); @@ -919,11 +959,6 @@ int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages, __u64 iov, kib_fmr_t *fmr); void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status); -int kiblnd_startup(lnet_ni_t *ni); -void kiblnd_shutdown(lnet_ni_t *ni); -int kiblnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); -void kiblnd_query(struct lnet_ni *ni, lnet_nid_t nid, unsigned long *when); - int kiblnd_tunables_init(void); void kiblnd_tunables_fini(void); @@ -933,7 +968,6 @@ int kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name); int kiblnd_failover_thread(void *arg); int kiblnd_alloc_pages(kib_pages_t **pp, int cpt, int npages); -void kiblnd_free_pages(kib_pages_t *p); int kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event); @@ -942,39 +976,30 @@ int kiblnd_translate_mtu(int value); int kiblnd_dev_failover(kib_dev_t *dev); int kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid); void kiblnd_destroy_peer(kib_peer_t *peer); +bool kiblnd_reconnect_peer(kib_peer_t *peer); void kiblnd_destroy_dev(kib_dev_t *dev); void kiblnd_unlink_peer_locked(kib_peer_t *peer); -void kiblnd_peer_alive(kib_peer_t *peer); kib_peer_t *kiblnd_find_peer_locked(lnet_nid_t nid); -void kiblnd_peer_connect_failed(kib_peer_t *peer, int active, int error); int kiblnd_close_stale_conns_locked(kib_peer_t *peer, - int version, __u64 incarnation); + int version, __u64 incarnation); int kiblnd_close_peer_conns_locked(kib_peer_t *peer, int why); -void kiblnd_connreq_done(kib_conn_t *conn, int status); kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, - int state, int version); -void kiblnd_destroy_conn(kib_conn_t *conn); + int state, int version); +void kiblnd_destroy_conn(kib_conn_t *conn, bool free_conn); void kiblnd_close_conn(kib_conn_t *conn, int error); void kiblnd_close_conn_locked(kib_conn_t *conn, int error); -int kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type, - int nob, kib_rdma_desc_t *dstrd, __u64 dstcookie); - void kiblnd_launch_tx(lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid); -void kiblnd_queue_tx_locked(kib_tx_t *tx, kib_conn_t *conn); -void kiblnd_queue_tx(kib_tx_t *tx, kib_conn_t *conn); -void kiblnd_init_tx_msg(lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob); void kiblnd_txlist_done(lnet_ni_t *ni, struct list_head *txlist, - int status); -void kiblnd_check_sends (kib_conn_t *conn); + int status); void kiblnd_qp_event(struct ib_event *event, void *arg); void kiblnd_cq_event(struct ib_event *event, void *arg); void kiblnd_cq_completion(struct ib_cq *cq, void *arg); void kiblnd_pack_msg(lnet_ni_t *ni, kib_msg_t *msg, int version, - int credits, lnet_nid_t dstnid, __u64 dststamp); + int credits, lnet_nid_t dstnid, __u64 dststamp); int kiblnd_unpack_msg(kib_msg_t *msg, int nob); int kiblnd_post_rx(kib_rx_t *rx, int credit); diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c index c7b9ccb13..2323e8d3a 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -40,6 +40,15 @@ #include "o2iblnd.h" +static void kiblnd_peer_alive(kib_peer_t *peer); +static void kiblnd_peer_connect_failed(kib_peer_t *peer, int active, int error); +static void kiblnd_check_sends(kib_conn_t *conn); +static void kiblnd_init_tx_msg(lnet_ni_t *ni, kib_tx_t *tx, + int type, int body_nob); +static int kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type, + int resid, kib_rdma_desc_t *dstrd, __u64 dstcookie); +static void kiblnd_queue_tx_locked(kib_tx_t *tx, kib_conn_t *conn); +static void kiblnd_queue_tx(kib_tx_t *tx, kib_conn_t *conn); static void kiblnd_unmap_tx(lnet_ni_t *ni, kib_tx_t *tx); static void @@ -50,12 +59,12 @@ kiblnd_tx_done(lnet_ni_t *ni, kib_tx_t *tx) int rc; int i; - LASSERT(net != NULL); + LASSERT(net); LASSERT(!in_interrupt()); LASSERT(!tx->tx_queued); /* mustn't be queued for sending */ - LASSERT(tx->tx_sending == 0); /* mustn't be awaiting sent callback */ + LASSERT(!tx->tx_sending); /* mustn't be awaiting sent callback */ LASSERT(!tx->tx_waiting); /* mustn't be awaiting peer response */ - LASSERT(tx->tx_pool != NULL); + LASSERT(tx->tx_pool); kiblnd_unmap_tx(ni, tx); @@ -64,7 +73,7 @@ kiblnd_tx_done(lnet_ni_t *ni, kib_tx_t *tx) lntmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL; rc = tx->tx_status; - if (tx->tx_conn != NULL) { + if (tx->tx_conn) { LASSERT(ni == tx->tx_conn->ibc_peer->ibp_ni); kiblnd_conn_decref(tx->tx_conn); @@ -78,7 +87,7 @@ kiblnd_tx_done(lnet_ni_t *ni, kib_tx_t *tx) /* delay finalize until my descs have been freed */ for (i = 0; i < 2; i++) { - if (lntmsg[i] == NULL) + if (!lntmsg[i]) continue; lnet_finalize(ni, lntmsg[i], rc); @@ -111,19 +120,19 @@ kiblnd_get_idle_tx(lnet_ni_t *ni, lnet_nid_t target) tps = net->ibn_tx_ps[lnet_cpt_of_nid(target)]; node = kiblnd_pool_alloc_node(&tps->tps_poolset); - if (node == NULL) + if (!node) return NULL; - tx = container_of(node, kib_tx_t, tx_list); + tx = list_entry(node, kib_tx_t, tx_list); - LASSERT(tx->tx_nwrq == 0); + LASSERT(!tx->tx_nwrq); LASSERT(!tx->tx_queued); - LASSERT(tx->tx_sending == 0); + LASSERT(!tx->tx_sending); LASSERT(!tx->tx_waiting); - LASSERT(tx->tx_status == 0); - LASSERT(tx->tx_conn == NULL); - LASSERT(tx->tx_lntmsg[0] == NULL); - LASSERT(tx->tx_lntmsg[1] == NULL); - LASSERT(tx->tx_nfrags == 0); + LASSERT(!tx->tx_status); + LASSERT(!tx->tx_conn); + LASSERT(!tx->tx_lntmsg[0]); + LASSERT(!tx->tx_lntmsg[1]); + LASSERT(!tx->tx_nfrags); return tx; } @@ -149,17 +158,15 @@ kiblnd_post_rx(kib_rx_t *rx, int credit) kib_conn_t *conn = rx->rx_conn; kib_net_t *net = conn->ibc_peer->ibp_ni->ni_data; struct ib_recv_wr *bad_wrq = NULL; - struct ib_mr *mr; + struct ib_mr *mr = conn->ibc_hdev->ibh_mrs; int rc; - LASSERT(net != NULL); + LASSERT(net); LASSERT(!in_interrupt()); LASSERT(credit == IBLND_POSTRX_NO_CREDIT || credit == IBLND_POSTRX_PEER_CREDIT || credit == IBLND_POSTRX_RSRVD_CREDIT); - - mr = kiblnd_find_dma_mr(conn->ibc_hdev, rx->rx_msgaddr, IBLND_MSG_SIZE); - LASSERT(mr != NULL); + LASSERT(mr); rx->rx_sge.lkey = mr->lkey; rx->rx_sge.addr = rx->rx_msgaddr; @@ -185,7 +192,7 @@ kiblnd_post_rx(kib_rx_t *rx, int credit) */ kiblnd_conn_addref(conn); rc = ib_post_recv(conn->ibc_cmid->qp, &rx->rx_wrq, &bad_wrq); - if (unlikely(rc != 0)) { + if (unlikely(rc)) { CERROR("Can't post rx for %s: %d, bad_wrq: %p\n", libcfs_nid2str(conn->ibc_peer->ibp_nid), rc, bad_wrq); rx->rx_nob = 0; @@ -194,7 +201,7 @@ kiblnd_post_rx(kib_rx_t *rx, int credit) if (conn->ibc_state < IBLND_CONN_ESTABLISHED) /* Initial post */ goto out; - if (unlikely(rc != 0)) { + if (unlikely(rc)) { kiblnd_close_conn(conn, rc); kiblnd_drop_rx(rx); /* No more posts for this rx */ goto out; @@ -225,7 +232,7 @@ kiblnd_find_waiting_tx_locked(kib_conn_t *conn, int txtype, __u64 cookie) kib_tx_t *tx = list_entry(tmp, kib_tx_t, tx_list); LASSERT(!tx->tx_queued); - LASSERT(tx->tx_sending != 0 || tx->tx_waiting); + LASSERT(tx->tx_sending || tx->tx_waiting); if (tx->tx_cookie != cookie) continue; @@ -251,7 +258,7 @@ kiblnd_handle_completion(kib_conn_t *conn, int txtype, int status, __u64 cookie) spin_lock(&conn->ibc_lock); tx = kiblnd_find_waiting_tx_locked(conn, txtype, cookie); - if (tx == NULL) { + if (!tx) { spin_unlock(&conn->ibc_lock); CWARN("Unmatched completion type %x cookie %#llx from %s\n", @@ -260,7 +267,7 @@ kiblnd_handle_completion(kib_conn_t *conn, int txtype, int status, __u64 cookie) return; } - if (tx->tx_status == 0) { /* success so far */ + if (!tx->tx_status) { /* success so far */ if (status < 0) /* failed? */ tx->tx_status = status; else if (txtype == IBLND_MSG_GET_REQ) @@ -269,7 +276,7 @@ kiblnd_handle_completion(kib_conn_t *conn, int txtype, int status, __u64 cookie) tx->tx_waiting = 0; - idle = !tx->tx_queued && (tx->tx_sending == 0); + idle = !tx->tx_queued && !tx->tx_sending; if (idle) list_del(&tx->tx_list); @@ -285,7 +292,7 @@ kiblnd_send_completion(kib_conn_t *conn, int type, int status, __u64 cookie) lnet_ni_t *ni = conn->ibc_peer->ibp_ni; kib_tx_t *tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid); - if (tx == NULL) { + if (!tx) { CERROR("Can't get tx for completion %x for %s\n", type, libcfs_nid2str(conn->ibc_peer->ibp_nid)); return; @@ -316,19 +323,18 @@ kiblnd_handle_rx(kib_rx_t *rx) msg->ibm_type, credits, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - if (credits != 0) { + if (credits) { /* Have I received credits that will let me send? */ spin_lock(&conn->ibc_lock); if (conn->ibc_credits + credits > - IBLND_MSG_QUEUE_SIZE(conn->ibc_version)) { + conn->ibc_queue_depth) { rc2 = conn->ibc_credits; spin_unlock(&conn->ibc_lock); CERROR("Bad credits from %s: %d + %d > %d\n", libcfs_nid2str(conn->ibc_peer->ibp_nid), - rc2, credits, - IBLND_MSG_QUEUE_SIZE(conn->ibc_version)); + rc2, credits, conn->ibc_queue_depth); kiblnd_close_conn(conn, -EPROTO); kiblnd_post_rx(rx, IBLND_POSTRX_NO_CREDIT); @@ -360,7 +366,7 @@ kiblnd_handle_rx(kib_rx_t *rx) break; } - if (credits != 0) /* credit already posted */ + if (credits) /* credit already posted */ post_credit = IBLND_POSTRX_NO_CREDIT; else /* a keepalive NOOP */ post_credit = IBLND_POSTRX_PEER_CREDIT; @@ -396,12 +402,12 @@ kiblnd_handle_rx(kib_rx_t *rx) spin_lock(&conn->ibc_lock); tx = kiblnd_find_waiting_tx_locked(conn, IBLND_MSG_PUT_REQ, - msg->ibm_u.putack.ibpam_src_cookie); - if (tx != NULL) + msg->ibm_u.putack.ibpam_src_cookie); + if (tx) list_del(&tx->tx_list); spin_unlock(&conn->ibc_lock); - if (tx == NULL) { + if (!tx) { CERROR("Unmatched PUT_ACK from %s\n", libcfs_nid2str(conn->ibc_peer->ibp_nid)); rc = -EPROTO; @@ -409,10 +415,11 @@ kiblnd_handle_rx(kib_rx_t *rx) } LASSERT(tx->tx_waiting); - /* CAVEAT EMPTOR: I could be racing with tx_complete, but... + /* + * CAVEAT EMPTOR: I could be racing with tx_complete, but... * (a) I can overwrite tx_msg since my peer has received it! - * (b) tx_waiting set tells tx_complete() it's not done. */ - + * (b) tx_waiting set tells tx_complete() it's not done. + */ tx->tx_nwrq = 0; /* overwrite PUT_REQ */ rc2 = kiblnd_init_rdma(conn, tx, IBLND_MSG_PUT_DONE, @@ -469,7 +476,7 @@ kiblnd_rx_complete(kib_rx_t *rx, int status, int nob) int rc; int err = -EIO; - LASSERT(net != NULL); + LASSERT(net); LASSERT(rx->rx_nob < 0); /* was posted */ rx->rx_nob = 0; /* isn't now */ @@ -486,9 +493,9 @@ kiblnd_rx_complete(kib_rx_t *rx, int status, int nob) rx->rx_nob = nob; rc = kiblnd_unpack_msg(msg, rx->rx_nob); - if (rc != 0) { + if (rc) { CERROR("Error %d unpacking rx from %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); + rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); goto failed; } @@ -497,7 +504,7 @@ kiblnd_rx_complete(kib_rx_t *rx, int status, int nob) msg->ibm_srcstamp != conn->ibc_incarnation || msg->ibm_dststamp != net->ibn_incarnation) { CERROR("Stale rx from %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); + libcfs_nid2str(conn->ibc_peer->ibp_nid)); err = -ESTALE; goto failed; } @@ -537,7 +544,7 @@ kiblnd_kvaddr_to_page(unsigned long vaddr) if (is_vmalloc_addr((void *)vaddr)) { page = vmalloc_to_page((void *)vaddr); - LASSERT(page != NULL); + LASSERT(page); return page; } #ifdef CONFIG_HIGHMEM @@ -549,7 +556,7 @@ kiblnd_kvaddr_to_page(unsigned long vaddr) } #endif page = virt_to_page(vaddr); - LASSERT(page != NULL); + LASSERT(page); return page; } @@ -565,8 +572,8 @@ kiblnd_fmr_map_tx(kib_net_t *net, kib_tx_t *tx, kib_rdma_desc_t *rd, int nob) int rc; int i; - LASSERT(tx->tx_pool != NULL); - LASSERT(tx->tx_pool->tpo_pool.po_owner != NULL); + LASSERT(tx->tx_pool); + LASSERT(tx->tx_pool->tpo_pool.po_owner); hdev = tx->tx_pool->tpo_hdev; @@ -582,13 +589,15 @@ kiblnd_fmr_map_tx(kib_net_t *net, kib_tx_t *tx, kib_rdma_desc_t *rd, int nob) fps = net->ibn_fmr_ps[cpt]; rc = kiblnd_fmr_pool_map(fps, pages, npages, 0, &tx->fmr); - if (rc != 0) { + if (rc) { CERROR("Can't map %d pages: %d\n", npages, rc); return rc; } - /* If rd is not tx_rd, it's going to get sent to a peer, who will need - * the rkey */ + /* + * If rd is not tx_rd, it's going to get sent to a peer, who will need + * the rkey + */ rd->rd_key = (rd != tx->tx_rd) ? tx->fmr.fmr_pfmr->fmr->rkey : tx->fmr.fmr_pfmr->fmr->lkey; rd->rd_frags[0].rf_addr &= ~hdev->ibh_page_mask; @@ -602,14 +611,14 @@ static void kiblnd_unmap_tx(lnet_ni_t *ni, kib_tx_t *tx) { kib_net_t *net = ni->ni_data; - LASSERT(net != NULL); + LASSERT(net); if (net->ibn_fmr_ps && tx->fmr.fmr_pfmr) { kiblnd_fmr_pool_unmap(&tx->fmr, tx->tx_status); tx->fmr.fmr_pfmr = NULL; } - if (tx->tx_nfrags != 0) { + if (tx->tx_nfrags) { kiblnd_dma_unmap_sg(tx->tx_pool->tpo_hdev->ibh_ibdev, tx->tx_frags, tx->tx_nfrags, tx->tx_dmadir); tx->tx_nfrags = 0; @@ -625,8 +634,10 @@ static int kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, __u32 nob; int i; - /* If rd is not tx_rd, it's going to get sent to a peer and I'm the - * RDMA sink */ + /* + * If rd is not tx_rd, it's going to get sent to a peer and I'm the + * RDMA sink + */ tx->tx_dmadir = (rd != tx->tx_rd) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; tx->tx_nfrags = nfrags; @@ -641,15 +652,15 @@ static int kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, nob += rd->rd_frags[i].rf_nob; } - /* looking for pre-mapping MR */ - mr = kiblnd_find_rd_dma_mr(hdev, rd); - if (mr != NULL) { + mr = kiblnd_find_rd_dma_mr(hdev, rd, tx->tx_conn ? + tx->tx_conn->ibc_max_frags : -1); + if (mr) { /* found pre-mapping MR */ rd->rd_key = (rd != tx->tx_rd) ? mr->rkey : mr->lkey; return 0; } - if (net->ibn_fmr_ps != NULL) + if (net->ibn_fmr_ps) return kiblnd_fmr_map_tx(net, tx, rd, nob); return -EINVAL; @@ -668,7 +679,7 @@ kiblnd_setup_rd_iov(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, LASSERT(nob > 0); LASSERT(niov > 0); - LASSERT(net != NULL); + LASSERT(net); while (offset >= iov->iov_len) { offset -= iov->iov_len; @@ -684,7 +695,7 @@ kiblnd_setup_rd_iov(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, vaddr = ((unsigned long)iov->iov_base) + offset; page_offset = vaddr & (PAGE_SIZE - 1); page = kiblnd_kvaddr_to_page(vaddr); - if (page == NULL) { + if (!page) { CERROR("Can't find page\n"); return -EFAULT; } @@ -710,7 +721,7 @@ kiblnd_setup_rd_iov(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, static int kiblnd_setup_rd_kiov(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, - int nkiov, lnet_kiov_t *kiov, int offset, int nob) + int nkiov, lnet_kiov_t *kiov, int offset, int nob) { kib_net_t *net = ni->ni_data; struct scatterlist *sg; @@ -720,7 +731,7 @@ kiblnd_setup_rd_kiov(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, LASSERT(nob > 0); LASSERT(nkiov > 0); - LASSERT(net != NULL); + LASSERT(net); while (offset >= kiov->kiov_len) { offset -= kiov->kiov_len; @@ -750,26 +761,24 @@ kiblnd_setup_rd_kiov(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, static int kiblnd_post_tx_locked(kib_conn_t *conn, kib_tx_t *tx, int credit) - __releases(conn->ibc_lock) - __acquires(conn->ibc_lock) + __must_hold(&conn->ibc_lock) { kib_msg_t *msg = tx->tx_msg; kib_peer_t *peer = conn->ibc_peer; int ver = conn->ibc_version; int rc; int done; - struct ib_send_wr *bad_wrq; LASSERT(tx->tx_queued); /* We rely on this for QP sizing */ LASSERT(tx->tx_nwrq > 0); - LASSERT(tx->tx_nwrq <= 1 + IBLND_RDMA_FRAGS(ver)); + LASSERT(tx->tx_nwrq <= 1 + conn->ibc_max_frags); - LASSERT(credit == 0 || credit == 1); + LASSERT(!credit || credit == 1); LASSERT(conn->ibc_outstanding_credits >= 0); - LASSERT(conn->ibc_outstanding_credits <= IBLND_MSG_QUEUE_SIZE(ver)); + LASSERT(conn->ibc_outstanding_credits <= conn->ibc_queue_depth); LASSERT(conn->ibc_credits >= 0); - LASSERT(conn->ibc_credits <= IBLND_MSG_QUEUE_SIZE(ver)); + LASSERT(conn->ibc_credits <= conn->ibc_queue_depth); if (conn->ibc_nsends_posted == IBLND_CONCURRENT_SENDS(ver)) { /* tx completions outstanding... */ @@ -778,13 +787,13 @@ kiblnd_post_tx_locked(kib_conn_t *conn, kib_tx_t *tx, int credit) return -EAGAIN; } - if (credit != 0 && conn->ibc_credits == 0) { /* no credits */ + if (credit && !conn->ibc_credits) { /* no credits */ CDEBUG(D_NET, "%s: no credits\n", libcfs_nid2str(peer->ibp_nid)); return -EAGAIN; } - if (credit != 0 && !IBLND_OOB_CAPABLE(ver) && + if (credit && !IBLND_OOB_CAPABLE(ver) && conn->ibc_credits == 1 && /* last credit reserved */ msg->ibm_type != IBLND_MSG_NOOP) { /* for NOOP */ CDEBUG(D_NET, "%s: not using last credit\n", @@ -800,9 +809,11 @@ kiblnd_post_tx_locked(kib_conn_t *conn, kib_tx_t *tx, int credit) (!kiblnd_need_noop(conn) || /* redundant NOOP */ (IBLND_OOB_CAPABLE(ver) && /* posted enough NOOP */ conn->ibc_noops_posted == IBLND_OOB_MSGS(ver)))) { - /* OK to drop when posted enough NOOPs, since + /* + * OK to drop when posted enough NOOPs, since * kiblnd_check_sends will queue NOOP again when - * posted NOOPs complete */ + * posted NOOPs complete + */ spin_unlock(&conn->ibc_lock); kiblnd_tx_done(peer->ibp_ni, tx); spin_lock(&conn->ibc_lock); @@ -821,12 +832,14 @@ kiblnd_post_tx_locked(kib_conn_t *conn, kib_tx_t *tx, int credit) if (msg->ibm_type == IBLND_MSG_NOOP) conn->ibc_noops_posted++; - /* CAVEAT EMPTOR! This tx could be the PUT_DONE of an RDMA + /* + * CAVEAT EMPTOR! This tx could be the PUT_DONE of an RDMA * PUT. If so, it was first queued here as a PUT_REQ, sent and * stashed on ibc_active_txs, matched by an incoming PUT_ACK, * and then re-queued here. It's (just) possible that * tx_sending is non-zero if we've not done the tx_complete() - * from the first send; hence the ++ rather than = below. */ + * from the first send; hence the ++ rather than = below. + */ tx->tx_sending++; list_add(&tx->tx_list, &conn->ibc_active_txs); @@ -838,16 +851,25 @@ kiblnd_post_tx_locked(kib_conn_t *conn, kib_tx_t *tx, int credit) /* close_conn will launch failover */ rc = -ENETDOWN; } else { - rc = ib_post_send(conn->ibc_cmid->qp, &tx->tx_wrq->wr, &bad_wrq); + struct ib_send_wr *wrq = &tx->tx_wrq[tx->tx_nwrq - 1].wr; + + LASSERTF(wrq->wr_id == kiblnd_ptr2wreqid(tx, IBLND_WID_TX), + "bad wr_id %llx, opc %d, flags %d, peer: %s\n", + wrq->wr_id, wrq->opcode, wrq->send_flags, + libcfs_nid2str(conn->ibc_peer->ibp_nid)); + wrq = NULL; + rc = ib_post_send(conn->ibc_cmid->qp, &tx->tx_wrq->wr, &wrq); } conn->ibc_last_send = jiffies; - if (rc == 0) + if (!rc) return 0; - /* NB credits are transferred in the actual - * message, which can only be the last work item */ + /* + * NB credits are transferred in the actual + * message, which can only be the last work item + */ conn->ibc_credits += credit; conn->ibc_outstanding_credits += msg->ibm_credits; conn->ibc_nsends_posted--; @@ -858,7 +880,7 @@ kiblnd_post_tx_locked(kib_conn_t *conn, kib_tx_t *tx, int credit) tx->tx_waiting = 0; tx->tx_sending--; - done = (tx->tx_sending == 0); + done = !tx->tx_sending; if (done) list_del(&tx->tx_list); @@ -881,7 +903,7 @@ kiblnd_post_tx_locked(kib_conn_t *conn, kib_tx_t *tx, int credit) return -EIO; } -void +static void kiblnd_check_sends(kib_conn_t *conn) { int ver = conn->ibc_version; @@ -899,13 +921,13 @@ kiblnd_check_sends(kib_conn_t *conn) LASSERT(conn->ibc_nsends_posted <= IBLND_CONCURRENT_SENDS(ver)); LASSERT(!IBLND_OOB_CAPABLE(ver) || - conn->ibc_noops_posted <= IBLND_OOB_MSGS(ver)); + conn->ibc_noops_posted <= IBLND_OOB_MSGS(ver)); LASSERT(conn->ibc_reserved_credits >= 0); while (conn->ibc_reserved_credits > 0 && !list_empty(&conn->ibc_tx_queue_rsrvd)) { tx = list_entry(conn->ibc_tx_queue_rsrvd.next, - kib_tx_t, tx_list); + kib_tx_t, tx_list); list_del(&tx->tx_list); list_add_tail(&tx->tx_list, &conn->ibc_tx_queue); conn->ibc_reserved_credits--; @@ -915,23 +937,21 @@ kiblnd_check_sends(kib_conn_t *conn) spin_unlock(&conn->ibc_lock); tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid); - if (tx != NULL) + if (tx) kiblnd_init_tx_msg(ni, tx, IBLND_MSG_NOOP, 0); spin_lock(&conn->ibc_lock); - if (tx != NULL) + if (tx) kiblnd_queue_tx_locked(tx, conn); } - kiblnd_conn_addref(conn); /* 1 ref for me.... (see b21911) */ - for (;;) { int credit; if (!list_empty(&conn->ibc_tx_queue_nocred)) { credit = 0; tx = list_entry(conn->ibc_tx_queue_nocred.next, - kib_tx_t, tx_list); + kib_tx_t, tx_list); } else if (!list_empty(&conn->ibc_tx_noops)) { LASSERT(!IBLND_OOB_CAPABLE(ver)); credit = 1; @@ -940,17 +960,16 @@ kiblnd_check_sends(kib_conn_t *conn) } else if (!list_empty(&conn->ibc_tx_queue)) { credit = 1; tx = list_entry(conn->ibc_tx_queue.next, - kib_tx_t, tx_list); - } else + kib_tx_t, tx_list); + } else { break; + } - if (kiblnd_post_tx_locked(conn, tx, credit) != 0) + if (kiblnd_post_tx_locked(conn, tx, credit)) break; } spin_unlock(&conn->ibc_lock); - - kiblnd_conn_decref(conn); /* ...until here */ } static void @@ -976,9 +995,10 @@ kiblnd_tx_complete(kib_tx_t *tx, int status) spin_lock(&conn->ibc_lock); - /* I could be racing with rdma completion. Whoever makes 'tx' idle - * gets to free it, which also drops its ref on 'conn'. */ - + /* + * I could be racing with rdma completion. Whoever makes 'tx' idle + * gets to free it, which also drops its ref on 'conn'. + */ tx->tx_sending--; conn->ibc_nsends_posted--; if (tx->tx_msg->ibm_type == IBLND_MSG_NOOP) @@ -989,7 +1009,7 @@ kiblnd_tx_complete(kib_tx_t *tx, int status) tx->tx_status = -EIO; } - idle = (tx->tx_sending == 0) && /* This is the final callback */ + idle = !tx->tx_sending && /* This is the final callback */ !tx->tx_waiting && /* Not waiting for peer */ !tx->tx_queued; /* Not re-queued (PUT_DONE) */ if (idle) @@ -1007,24 +1027,22 @@ kiblnd_tx_complete(kib_tx_t *tx, int status) kiblnd_conn_decref(conn); /* ...until here */ } -void +static void kiblnd_init_tx_msg(lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob) { kib_hca_dev_t *hdev = tx->tx_pool->tpo_hdev; struct ib_sge *sge = &tx->tx_sge[tx->tx_nwrq]; struct ib_rdma_wr *wrq = &tx->tx_wrq[tx->tx_nwrq]; int nob = offsetof(kib_msg_t, ibm_u) + body_nob; - struct ib_mr *mr; + struct ib_mr *mr = hdev->ibh_mrs; LASSERT(tx->tx_nwrq >= 0); LASSERT(tx->tx_nwrq < IBLND_MAX_RDMA_FRAGS + 1); LASSERT(nob <= IBLND_MSG_SIZE); + LASSERT(mr); kiblnd_init_msg(tx->tx_msg, type, body_nob); - mr = kiblnd_find_dma_mr(hdev, tx->tx_msgaddr, nob); - LASSERT(mr != NULL); - sge->lkey = mr->lkey; sge->addr = tx->tx_msgaddr; sge->length = nob; @@ -1041,25 +1059,23 @@ kiblnd_init_tx_msg(lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob) tx->tx_nwrq++; } -int +static int kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type, - int resid, kib_rdma_desc_t *dstrd, __u64 dstcookie) + int resid, kib_rdma_desc_t *dstrd, __u64 dstcookie) { kib_msg_t *ibmsg = tx->tx_msg; kib_rdma_desc_t *srcrd = tx->tx_rd; struct ib_sge *sge = &tx->tx_sge[0]; struct ib_rdma_wr *wrq = &tx->tx_wrq[0], *next; int rc = resid; - int srcidx; - int dstidx; + int srcidx = 0; + int dstidx = 0; int wrknob; LASSERT(!in_interrupt()); - LASSERT(tx->tx_nwrq == 0); + LASSERT(!tx->tx_nwrq); LASSERT(type == IBLND_MSG_GET_DONE || - type == IBLND_MSG_PUT_DONE); - - srcidx = dstidx = 0; + type == IBLND_MSG_PUT_DONE); while (resid > 0) { if (srcidx >= srcrd->rd_nfrags) { @@ -1074,10 +1090,10 @@ kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type, break; } - if (tx->tx_nwrq == IBLND_RDMA_FRAGS(conn->ibc_version)) { - CERROR("RDMA too fragmented for %s (%d): %d/%d src %d/%d dst frags\n", + if (tx->tx_nwrq >= conn->ibc_max_frags) { + CERROR("RDMA has too many fragments for peer %s (%d), src idx/frags: %d/%d dst idx/frags: %d/%d\n", libcfs_nid2str(conn->ibc_peer->ibp_nid), - IBLND_RDMA_FRAGS(conn->ibc_version), + conn->ibc_max_frags, srcidx, srcrd->rd_nfrags, dstidx, dstrd->rd_nfrags); rc = -EMSGSIZE; @@ -1127,7 +1143,7 @@ kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type, return rc; } -void +static void kiblnd_queue_tx_locked(kib_tx_t *tx, kib_conn_t *conn) { struct list_head *q; @@ -1137,9 +1153,11 @@ kiblnd_queue_tx_locked(kib_tx_t *tx, kib_conn_t *conn) LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED); tx->tx_queued = 1; - tx->tx_deadline = jiffies + (*kiblnd_tunables.kib_timeout * HZ); + tx->tx_deadline = jiffies + + msecs_to_jiffies(*kiblnd_tunables.kib_timeout * + MSEC_PER_SEC); - if (tx->tx_conn == NULL) { + if (!tx->tx_conn) { kiblnd_conn_addref(conn); tx->tx_conn = conn; LASSERT(tx->tx_msg->ibm_type != IBLND_MSG_PUT_DONE); @@ -1180,7 +1198,7 @@ kiblnd_queue_tx_locked(kib_tx_t *tx, kib_conn_t *conn) list_add_tail(&tx->tx_list, q); } -void +static void kiblnd_queue_tx(kib_tx_t *tx, kib_conn_t *conn) { spin_lock(&conn->ibc_lock); @@ -1200,19 +1218,19 @@ static int kiblnd_resolve_addr(struct rdma_cm_id *cmid, /* allow the port to be reused */ rc = rdma_set_reuseaddr(cmid, 1); - if (rc != 0) { + if (rc) { CERROR("Unable to set reuse on cmid: %d\n", rc); return rc; } /* look for a free privileged port */ - for (port = PROT_SOCK-1; port > 0; port--) { + for (port = PROT_SOCK - 1; port > 0; port--) { srcaddr->sin_port = htons(port); rc = rdma_resolve_addr(cmid, (struct sockaddr *)srcaddr, (struct sockaddr *)dstaddr, timeout_ms); - if (rc == 0) { + if (!rc) { CDEBUG(D_NET, "bound to port %hu\n", port); return 0; } else if (rc == -EADDRINUSE || rc == -EADDRNOTAVAIL) { @@ -1237,8 +1255,9 @@ kiblnd_connect_peer(kib_peer_t *peer) struct sockaddr_in dstaddr; int rc; - LASSERT(net != NULL); + LASSERT(net); LASSERT(peer->ibp_connecting > 0); + LASSERT(!peer->ibp_reconnecting); cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, peer, RDMA_PS_TCP, IB_QPT_RC); @@ -1271,14 +1290,14 @@ kiblnd_connect_peer(kib_peer_t *peer) (struct sockaddr *)&dstaddr, *kiblnd_tunables.kib_timeout * 1000); } - if (rc != 0) { + if (rc) { /* Can't initiate address resolution: */ CERROR("Can't resolve addr for %s: %d\n", libcfs_nid2str(peer->ibp_nid), rc); goto failed2; } - LASSERT(cmid->device != NULL); + LASSERT(cmid->device); CDEBUG(D_NET, "%s: connection bound to %s:%pI4h:%s\n", libcfs_nid2str(peer->ibp_nid), dev->ibd_ifname, &dev->ibd_ifip, cmid->device->name); @@ -1286,12 +1305,64 @@ kiblnd_connect_peer(kib_peer_t *peer) return; failed2: + kiblnd_peer_connect_failed(peer, 1, rc); kiblnd_peer_decref(peer); /* cmid's ref */ rdma_destroy_id(cmid); + return; failed: kiblnd_peer_connect_failed(peer, 1, rc); } +bool +kiblnd_reconnect_peer(kib_peer_t *peer) +{ + rwlock_t *glock = &kiblnd_data.kib_global_lock; + char *reason = NULL; + struct list_head txs; + unsigned long flags; + + INIT_LIST_HEAD(&txs); + + write_lock_irqsave(glock, flags); + if (!peer->ibp_reconnecting) { + if (peer->ibp_accepting) + reason = "accepting"; + else if (peer->ibp_connecting) + reason = "connecting"; + else if (!list_empty(&peer->ibp_conns)) + reason = "connected"; + else /* connected then closed */ + reason = "closed"; + + goto no_reconnect; + } + + LASSERT(!peer->ibp_accepting && !peer->ibp_connecting && + list_empty(&peer->ibp_conns)); + peer->ibp_reconnecting = 0; + + if (!kiblnd_peer_active(peer)) { + list_splice_init(&peer->ibp_tx_queue, &txs); + reason = "unlinked"; + goto no_reconnect; + } + + peer->ibp_connecting++; + peer->ibp_reconnected++; + write_unlock_irqrestore(glock, flags); + + kiblnd_connect_peer(peer); + return true; + +no_reconnect: + write_unlock_irqrestore(glock, flags); + + CWARN("Abort reconnection of %s: %s\n", + libcfs_nid2str(peer->ibp_nid), reason); + kiblnd_txlist_done(peer->ibp_ni, &txs, -ECONNABORTED); + return false; +} + void kiblnd_launch_tx(lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid) { @@ -1302,25 +1373,28 @@ kiblnd_launch_tx(lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid) unsigned long flags; int rc; - /* If I get here, I've committed to send, so I complete the tx with - * failure on any problems */ - - LASSERT(tx == NULL || tx->tx_conn == NULL); /* only set when assigned a conn */ - LASSERT(tx == NULL || tx->tx_nwrq > 0); /* work items have been set up */ + /* + * If I get here, I've committed to send, so I complete the tx with + * failure on any problems + */ + LASSERT(!tx || !tx->tx_conn); /* only set when assigned a conn */ + LASSERT(!tx || tx->tx_nwrq > 0); /* work items have been set up */ - /* First time, just use a read lock since I expect to find my peer - * connected */ + /* + * First time, just use a read lock since I expect to find my peer + * connected + */ read_lock_irqsave(g_lock, flags); peer = kiblnd_find_peer_locked(nid); - if (peer != NULL && !list_empty(&peer->ibp_conns)) { + if (peer && !list_empty(&peer->ibp_conns)) { /* Found a peer with an established connection */ conn = kiblnd_get_conn_locked(peer); kiblnd_conn_addref(conn); /* 1 ref for me... */ read_unlock_irqrestore(g_lock, flags); - if (tx != NULL) + if (tx) kiblnd_queue_tx(tx, conn); kiblnd_conn_decref(conn); /* ...to here */ return; @@ -1331,14 +1405,13 @@ kiblnd_launch_tx(lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid) write_lock(g_lock); peer = kiblnd_find_peer_locked(nid); - if (peer != NULL) { + if (peer) { if (list_empty(&peer->ibp_conns)) { /* found a peer, but it's still connecting... */ - LASSERT(peer->ibp_connecting != 0 || - peer->ibp_accepting != 0); - if (tx != NULL) + LASSERT(kiblnd_peer_connecting(peer)); + if (tx) list_add_tail(&tx->tx_list, - &peer->ibp_tx_queue); + &peer->ibp_tx_queue); write_unlock_irqrestore(g_lock, flags); } else { conn = kiblnd_get_conn_locked(peer); @@ -1346,7 +1419,7 @@ kiblnd_launch_tx(lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid) write_unlock_irqrestore(g_lock, flags); - if (tx != NULL) + if (tx) kiblnd_queue_tx(tx, conn); kiblnd_conn_decref(conn); /* ...to here */ } @@ -1357,9 +1430,9 @@ kiblnd_launch_tx(lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid) /* Allocate a peer ready to add to the peer table and retry */ rc = kiblnd_create_peer(ni, &peer, nid); - if (rc != 0) { + if (rc) { CERROR("Can't create peer %s\n", libcfs_nid2str(nid)); - if (tx != NULL) { + if (tx) { tx->tx_status = -EHOSTUNREACH; tx->tx_waiting = 0; kiblnd_tx_done(ni, tx); @@ -1370,14 +1443,13 @@ kiblnd_launch_tx(lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid) write_lock_irqsave(g_lock, flags); peer2 = kiblnd_find_peer_locked(nid); - if (peer2 != NULL) { + if (peer2) { if (list_empty(&peer2->ibp_conns)) { /* found a peer, but it's still connecting... */ - LASSERT(peer2->ibp_connecting != 0 || - peer2->ibp_accepting != 0); - if (tx != NULL) + LASSERT(kiblnd_peer_connecting(peer2)); + if (tx) list_add_tail(&tx->tx_list, - &peer2->ibp_tx_queue); + &peer2->ibp_tx_queue); write_unlock_irqrestore(g_lock, flags); } else { conn = kiblnd_get_conn_locked(peer2); @@ -1385,7 +1457,7 @@ kiblnd_launch_tx(lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid) write_unlock_irqrestore(g_lock, flags); - if (tx != NULL) + if (tx) kiblnd_queue_tx(tx, conn); kiblnd_conn_decref(conn); /* ...to here */ } @@ -1395,13 +1467,13 @@ kiblnd_launch_tx(lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid) } /* Brand new peer */ - LASSERT(peer->ibp_connecting == 0); + LASSERT(!peer->ibp_connecting); peer->ibp_connecting = 1; /* always called with a ref on ni, which prevents ni being shutdown */ - LASSERT(((kib_net_t *)ni->ni_data)->ibn_shutdown == 0); + LASSERT(!((kib_net_t *)ni->ni_data)->ibn_shutdown); - if (tx != NULL) + if (tx) list_add_tail(&tx->tx_list, &peer->ibp_tx_queue); kiblnd_peer_addref(peer); @@ -1437,13 +1509,13 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n", payload_nob, payload_niov, libcfs_id2str(target)); - LASSERT(payload_nob == 0 || payload_niov > 0); + LASSERT(!payload_nob || payload_niov > 0); LASSERT(payload_niov <= LNET_MAX_IOV); /* Thread context */ LASSERT(!in_interrupt()); /* payload is either all vaddrs or all pages */ - LASSERT(!(payload_kiov != NULL && payload_iov != NULL)); + LASSERT(!(payload_kiov && payload_iov)); switch (type) { default: @@ -1451,7 +1523,7 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) return -EIO; case LNET_MSG_ACK: - LASSERT(payload_nob == 0); + LASSERT(!payload_nob); break; case LNET_MSG_GET: @@ -1464,7 +1536,7 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) break; /* send IMMEDIATE */ tx = kiblnd_get_idle_tx(ni, target.nid); - if (tx == NULL) { + if (!tx) { CERROR("Can't allocate txd for GET to %s\n", libcfs_nid2str(target.nid)); return -ENOMEM; @@ -1472,7 +1544,7 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) ibmsg = tx->tx_msg; rd = &ibmsg->ibm_u.get.ibgm_rd; - if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0) + if (!(lntmsg->msg_md->md_options & LNET_MD_KIOV)) rc = kiblnd_setup_rd_iov(ni, tx, rd, lntmsg->msg_md->md_niov, lntmsg->msg_md->md_iov.iov, @@ -1482,7 +1554,7 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) lntmsg->msg_md->md_niov, lntmsg->msg_md->md_iov.kiov, 0, lntmsg->msg_md->md_length); - if (rc != 0) { + if (rc) { CERROR("Can't setup GET sink for %s: %d\n", libcfs_nid2str(target.nid), rc); kiblnd_tx_done(ni, tx); @@ -1496,7 +1568,7 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) kiblnd_init_tx_msg(ni, tx, IBLND_MSG_GET_REQ, nob); tx->tx_lntmsg[1] = lnet_create_reply_msg(ni, lntmsg); - if (tx->tx_lntmsg[1] == NULL) { + if (!tx->tx_lntmsg[1]) { CERROR("Can't create reply for GET -> %s\n", libcfs_nid2str(target.nid)); kiblnd_tx_done(ni, tx); @@ -1516,14 +1588,14 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) break; /* send IMMEDIATE */ tx = kiblnd_get_idle_tx(ni, target.nid); - if (tx == NULL) { + if (!tx) { CERROR("Can't allocate %s txd for %s\n", type == LNET_MSG_PUT ? "PUT" : "REPLY", libcfs_nid2str(target.nid)); return -ENOMEM; } - if (payload_kiov == NULL) + if (!payload_kiov) rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd, payload_niov, payload_iov, payload_offset, payload_nob); @@ -1531,7 +1603,7 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd, payload_niov, payload_kiov, payload_offset, payload_nob); - if (rc != 0) { + if (rc) { CERROR("Can't setup PUT src for %s: %d\n", libcfs_nid2str(target.nid), rc); kiblnd_tx_done(ni, tx); @@ -1555,16 +1627,16 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) <= IBLND_MSG_SIZE); tx = kiblnd_get_idle_tx(ni, target.nid); - if (tx == NULL) { + if (!tx) { CERROR("Can't send %d to %s: tx descs exhausted\n", - type, libcfs_nid2str(target.nid)); + type, libcfs_nid2str(target.nid)); return -ENOMEM; } ibmsg = tx->tx_msg; ibmsg->ibm_u.immediate.ibim_hdr = *hdr; - if (payload_kiov != NULL) + if (payload_kiov) lnet_copy_kiov2flat(IBLND_MSG_SIZE, ibmsg, offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), payload_niov, payload_kiov, @@ -1596,22 +1668,22 @@ kiblnd_reply(lnet_ni_t *ni, kib_rx_t *rx, lnet_msg_t *lntmsg) int rc; tx = kiblnd_get_idle_tx(ni, rx->rx_conn->ibc_peer->ibp_nid); - if (tx == NULL) { + if (!tx) { CERROR("Can't get tx for REPLY to %s\n", libcfs_nid2str(target.nid)); goto failed_0; } - if (nob == 0) + if (!nob) rc = 0; - else if (kiov == NULL) + else if (!kiov) rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd, niov, iov, offset, nob); else rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd, niov, kiov, offset, nob); - if (rc != 0) { + if (rc) { CERROR("Can't setup GET src for %s: %d\n", libcfs_nid2str(target.nid), rc); goto failed_1; @@ -1627,12 +1699,11 @@ kiblnd_reply(lnet_ni_t *ni, kib_rx_t *rx, lnet_msg_t *lntmsg) goto failed_1; } - if (nob == 0) { + if (!nob) { /* No RDMA: local completion may happen now! */ lnet_finalize(ni, lntmsg, 0); } else { - /* RDMA: lnet_finalize(lntmsg) when it - * completes */ + /* RDMA: lnet_finalize(lntmsg) when it completes */ tx->tx_lntmsg[0] = lntmsg; } @@ -1647,8 +1718,8 @@ kiblnd_reply(lnet_ni_t *ni, kib_rx_t *rx, lnet_msg_t *lntmsg) int kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, - unsigned int niov, struct kvec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) + unsigned int niov, struct kvec *iov, lnet_kiov_t *kiov, + unsigned int offset, unsigned int mlen, unsigned int rlen) { kib_rx_t *rx = private; kib_msg_t *rxmsg = rx->rx_msg; @@ -1661,7 +1732,7 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, LASSERT(mlen <= rlen); LASSERT(!in_interrupt()); /* Either all pages or all vaddrs */ - LASSERT(!(kiov != NULL && iov != NULL)); + LASSERT(!(kiov && iov)); switch (rxmsg->ibm_type) { default: @@ -1671,13 +1742,13 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[rlen]); if (nob > rx->rx_nob) { CERROR("Immediate message from %s too big: %d(%d)\n", - libcfs_nid2str(rxmsg->ibm_u.immediate.ibim_hdr.src_nid), - nob, rx->rx_nob); + libcfs_nid2str(rxmsg->ibm_u.immediate.ibim_hdr.src_nid), + nob, rx->rx_nob); rc = -EPROTO; break; } - if (kiov != NULL) + if (kiov) lnet_copy_flat2kiov(niov, kiov, offset, IBLND_MSG_SIZE, rxmsg, offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), @@ -1694,7 +1765,7 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, kib_msg_t *txmsg; kib_rdma_desc_t *rd; - if (mlen == 0) { + if (!mlen) { lnet_finalize(ni, lntmsg, 0); kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, 0, rxmsg->ibm_u.putreq.ibprm_cookie); @@ -1702,7 +1773,7 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, } tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid); - if (tx == NULL) { + if (!tx) { CERROR("Can't allocate tx for %s\n", libcfs_nid2str(conn->ibc_peer->ibp_nid)); /* Not replying will break the connection */ @@ -1712,13 +1783,13 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, txmsg = tx->tx_msg; rd = &txmsg->ibm_u.putack.ibpam_rd; - if (kiov == NULL) + if (!kiov) rc = kiblnd_setup_rd_iov(ni, tx, rd, niov, iov, offset, mlen); else rc = kiblnd_setup_rd_kiov(ni, tx, rd, niov, kiov, offset, mlen); - if (rc != 0) { + if (rc) { CERROR("Can't setup PUT sink for %s: %d\n", libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); kiblnd_tx_done(ni, tx); @@ -1744,7 +1815,7 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, } case IBLND_MSG_GET_REQ: - if (lntmsg != NULL) { + if (lntmsg) { /* Optimized GET; RDMA lntmsg's payload */ kiblnd_reply(ni, rx, lntmsg); } else { @@ -1778,7 +1849,7 @@ kiblnd_thread_fini(void) atomic_dec(&kiblnd_data.kib_nthreads); } -void +static void kiblnd_peer_alive(kib_peer_t *peer) { /* This is racy, but everyone's only writing cfs_time_current() */ @@ -1795,10 +1866,7 @@ kiblnd_peer_notify(kib_peer_t *peer) read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - if (list_empty(&peer->ibp_conns) && - peer->ibp_accepting == 0 && - peer->ibp_connecting == 0 && - peer->ibp_error != 0) { + if (kiblnd_peer_idle(peer) && peer->ibp_error) { error = peer->ibp_error; peer->ibp_error = 0; @@ -1807,7 +1875,7 @@ kiblnd_peer_notify(kib_peer_t *peer) read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - if (error != 0) + if (error) lnet_notify(peer->ibp_ni, peer->ibp_nid, 0, last_alive); } @@ -1815,25 +1883,27 @@ kiblnd_peer_notify(kib_peer_t *peer) void kiblnd_close_conn_locked(kib_conn_t *conn, int error) { - /* This just does the immediate housekeeping. 'error' is zero for a + /* + * This just does the immediate housekeeping. 'error' is zero for a * normal shutdown which can happen only after the connection has been * established. If the connection is established, schedule the - * connection to be finished off by the connd. Otherwise the connd is + * connection to be finished off by the connd. Otherwise the connd is * already dealing with it (either to set it up or tear it down). - * Caller holds kib_global_lock exclusively in irq context */ + * Caller holds kib_global_lock exclusively in irq context + */ kib_peer_t *peer = conn->ibc_peer; kib_dev_t *dev; unsigned long flags; - LASSERT(error != 0 || conn->ibc_state >= IBLND_CONN_ESTABLISHED); + LASSERT(error || conn->ibc_state >= IBLND_CONN_ESTABLISHED); - if (error != 0 && conn->ibc_comms_error == 0) + if (error && !conn->ibc_comms_error) conn->ibc_comms_error = error; if (conn->ibc_state != IBLND_CONN_ESTABLISHED) return; /* already being handled */ - if (error == 0 && + if (!error && list_empty(&conn->ibc_tx_noops) && list_empty(&conn->ibc_tx_queue) && list_empty(&conn->ibc_tx_queue_rsrvd) && @@ -1843,12 +1913,12 @@ kiblnd_close_conn_locked(kib_conn_t *conn, int error) libcfs_nid2str(peer->ibp_nid)); } else { CNETERR("Closing conn to %s: error %d%s%s%s%s%s\n", - libcfs_nid2str(peer->ibp_nid), error, - list_empty(&conn->ibc_tx_queue) ? "" : "(sending)", - list_empty(&conn->ibc_tx_noops) ? "" : "(sending_noops)", - list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)", - list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)", - list_empty(&conn->ibc_active_txs) ? "" : "(waiting)"); + libcfs_nid2str(peer->ibp_nid), error, + list_empty(&conn->ibc_tx_queue) ? "" : "(sending)", + list_empty(&conn->ibc_tx_noops) ? "" : "(sending_noops)", + list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)", + list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)", + list_empty(&conn->ibc_active_txs) ? "" : "(waiting)"); } dev = ((kib_net_t *)peer->ibp_ni->ni_data)->ibn_dev; @@ -1865,7 +1935,7 @@ kiblnd_close_conn_locked(kib_conn_t *conn, int error) kiblnd_set_conn_state(conn, IBLND_CONN_CLOSING); - if (error != 0 && + if (error && kiblnd_dev_can_failover(dev)) { list_add_tail(&dev->ibd_fail_list, &kiblnd_data.kib_failed_devs); @@ -1929,8 +1999,7 @@ kiblnd_abort_txs(kib_conn_t *conn, struct list_head *txs) if (txs == &conn->ibc_active_txs) { LASSERT(!tx->tx_queued); - LASSERT(tx->tx_waiting || - tx->tx_sending != 0); + LASSERT(tx->tx_waiting || tx->tx_sending); } else { LASSERT(tx->tx_queued); } @@ -1938,7 +2007,7 @@ kiblnd_abort_txs(kib_conn_t *conn, struct list_head *txs) tx->tx_status = -ECONNABORTED; tx->tx_waiting = 0; - if (tx->tx_sending == 0) { + if (!tx->tx_sending) { tx->tx_queued = 0; list_del(&tx->tx_list); list_add(&tx->tx_list, &zombies); @@ -1958,14 +2027,17 @@ kiblnd_finalise_conn(kib_conn_t *conn) kiblnd_set_conn_state(conn, IBLND_CONN_DISCONNECTED); - /* abort_receives moves QP state to IB_QPS_ERR. This is only required + /* + * abort_receives moves QP state to IB_QPS_ERR. This is only required * for connections that didn't get as far as being connected, because - * rdma_disconnect() does this for free. */ + * rdma_disconnect() does this for free. + */ kiblnd_abort_receives(conn); - /* Complete all tx descs not waiting for sends to complete. - * NB we should be safe from RDMA now that the QP has changed state */ - + /* + * Complete all tx descs not waiting for sends to complete. + * NB we should be safe from RDMA now that the QP has changed state + */ kiblnd_abort_txs(conn, &conn->ibc_tx_noops); kiblnd_abort_txs(conn, &conn->ibc_tx_queue); kiblnd_abort_txs(conn, &conn->ibc_tx_queue_rsrvd); @@ -1975,13 +2047,13 @@ kiblnd_finalise_conn(kib_conn_t *conn) kiblnd_handle_early_rxs(conn); } -void +static void kiblnd_peer_connect_failed(kib_peer_t *peer, int active, int error) { LIST_HEAD(zombies); unsigned long flags; - LASSERT(error != 0); + LASSERT(error); LASSERT(!in_interrupt()); write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); @@ -1994,14 +2066,14 @@ kiblnd_peer_connect_failed(kib_peer_t *peer, int active, int error) peer->ibp_accepting--; } - if (peer->ibp_connecting != 0 || - peer->ibp_accepting != 0) { + if (kiblnd_peer_connecting(peer)) { /* another connection attempt under way... */ write_unlock_irqrestore(&kiblnd_data.kib_global_lock, - flags); + flags); return; } + peer->ibp_reconnected = 0; if (list_empty(&peer->ibp_conns)) { /* Take peer's blocked transmits to complete with error */ list_add(&zombies, &peer->ibp_tx_queue); @@ -2029,7 +2101,7 @@ kiblnd_peer_connect_failed(kib_peer_t *peer, int active, int error) kiblnd_txlist_done(peer->ibp_ni, &zombies, -EHOSTUNREACH); } -void +static void kiblnd_connreq_done(kib_conn_t *conn, int status) { kib_peer_t *peer = conn->ibc_peer; @@ -2047,14 +2119,14 @@ kiblnd_connreq_done(kib_conn_t *conn, int status) LASSERT(!in_interrupt()); LASSERT((conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT && - peer->ibp_connecting > 0) || + peer->ibp_connecting > 0) || (conn->ibc_state == IBLND_CONN_PASSIVE_WAIT && - peer->ibp_accepting > 0)); + peer->ibp_accepting > 0)); LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars)); conn->ibc_connvars = NULL; - if (status != 0) { + if (status) { /* failed to establish connection */ kiblnd_peer_connect_failed(peer, active, status); kiblnd_finalise_conn(conn); @@ -2068,16 +2140,19 @@ kiblnd_connreq_done(kib_conn_t *conn, int status) kiblnd_set_conn_state(conn, IBLND_CONN_ESTABLISHED); kiblnd_peer_alive(peer); - /* Add conn to peer's list and nuke any dangling conns from a different - * peer instance... */ + /* + * Add conn to peer's list and nuke any dangling conns from a different + * peer instance... + */ kiblnd_conn_addref(conn); /* +1 ref for ibc_list */ list_add(&conn->ibc_list, &peer->ibp_conns); + peer->ibp_reconnected = 0; if (active) peer->ibp_connecting--; else peer->ibp_accepting--; - if (peer->ibp_version == 0) { + if (!peer->ibp_version) { peer->ibp_version = conn->ibc_version; peer->ibp_incarnation = conn->ibc_incarnation; } @@ -2095,7 +2170,7 @@ kiblnd_connreq_done(kib_conn_t *conn, int status) list_del_init(&peer->ibp_tx_queue); if (!kiblnd_peer_active(peer) || /* peer has been deleted */ - conn->ibc_comms_error != 0) { /* error has happened already */ + conn->ibc_comms_error) { /* error has happened already */ lnet_ni_t *ni = peer->ibp_ni; /* start to shut down connection */ @@ -2107,6 +2182,16 @@ kiblnd_connreq_done(kib_conn_t *conn, int status) return; } + /** + * refcount taken by cmid is not reliable after I released the glock + * because this connection is visible to other threads now, another + * thread can find and close this connection right after I released + * the glock, if kiblnd_cm_callback for RDMA_CM_EVENT_DISCONNECTED is + * called, it can release the connection refcount taken by cmid. + * It means the connection could be destroyed before I finish my + * operations on it. + */ + kiblnd_conn_addref(conn); write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); /* Schedule blocked txs */ @@ -2122,6 +2207,8 @@ kiblnd_connreq_done(kib_conn_t *conn, int status) /* schedule blocked rxs */ kiblnd_handle_early_rxs(conn); + + kiblnd_conn_decref(conn); } static void @@ -2131,7 +2218,7 @@ kiblnd_reject(struct rdma_cm_id *cmid, kib_rej_t *rej) rc = rdma_reject(cmid, rej, sizeof(*rej)); - if (rc != 0) + if (rc) CWARN("Error %d sending reject\n", rc); } @@ -2159,14 +2246,14 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) /* cmid inherits 'context' from the corresponding listener id */ ibdev = (kib_dev_t *)cmid->context; - LASSERT(ibdev != NULL); + LASSERT(ibdev); memset(&rej, 0, sizeof(rej)); rej.ibr_magic = IBLND_MSG_MAGIC; rej.ibr_why = IBLND_REJECT_FATAL; rej.ibr_cp.ibcp_max_msg_size = IBLND_MSG_SIZE; - peer_addr = (struct sockaddr_in *)&(cmid->route.addr.dst_addr); + peer_addr = (struct sockaddr_in *)&cmid->route.addr.dst_addr; if (*kiblnd_tunables.kib_require_priv_port && ntohs(peer_addr->sin_port) >= PROT_SOCK) { __u32 ip = ntohl(peer_addr->sin_addr.s_addr); @@ -2181,12 +2268,14 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) goto failed; } - /* Future protocol version compatibility support! If the + /* + * Future protocol version compatibility support! If the * o2iblnd-specific protocol changes, or when LNET unifies * protocols over all LNDs, the initial connection will * negotiate a protocol version. I trap this here to avoid * console errors; the reject tells the peer which protocol I - * speak. */ + * speak. + */ if (reqmsg->ibm_magic == LNET_PROTO_MAGIC || reqmsg->ibm_magic == __swab32(LNET_PROTO_MAGIC)) goto failed; @@ -2200,7 +2289,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) goto failed; rc = kiblnd_unpack_msg(reqmsg, priv_nob); - if (rc != 0) { + if (rc) { CERROR("Can't parse connection request: %d\n", rc); goto failed; } @@ -2208,17 +2297,17 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) nid = reqmsg->ibm_srcnid; ni = lnet_net2ni(LNET_NIDNET(reqmsg->ibm_dstnid)); - if (ni != NULL) { + if (ni) { net = (kib_net_t *)ni->ni_data; rej.ibr_incarnation = net->ibn_incarnation; } - if (ni == NULL || /* no matching net */ + if (!ni || /* no matching net */ ni->ni_nid != reqmsg->ibm_dstnid || /* right NET, wrong NID! */ net->ibn_dev != ibdev) { /* wrong device */ - CERROR("Can't accept %s on %s (%s:%d:%pI4h): bad dst nid %s\n", + CERROR("Can't accept conn from %s on %s (%s:%d:%pI4h): bad dst nid %s\n", libcfs_nid2str(nid), - ni == NULL ? "NA" : libcfs_nid2str(ni->ni_nid), + !ni ? "NA" : libcfs_nid2str(ni->ni_nid), ibdev->ibd_ifname, ibdev->ibd_nnets, &ibdev->ibd_ifip, libcfs_nid2str(reqmsg->ibm_dstnid)); @@ -2227,7 +2316,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) } /* check time stamp as soon as possible */ - if (reqmsg->ibm_dststamp != 0 && + if (reqmsg->ibm_dststamp && reqmsg->ibm_dststamp != net->ibn_incarnation) { CWARN("Stale connection request\n"); rej.ibr_why = IBLND_REJECT_CONN_STALE; @@ -2243,10 +2332,11 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) goto failed; } - if (reqmsg->ibm_u.connparams.ibcp_queue_depth != + if (reqmsg->ibm_u.connparams.ibcp_queue_depth > IBLND_MSG_QUEUE_SIZE(version)) { - CERROR("Can't accept %s: incompatible queue depth %d (%d wanted)\n", - libcfs_nid2str(nid), reqmsg->ibm_u.connparams.ibcp_queue_depth, + CERROR("Can't accept conn from %s, queue depth too large: %d (<=%d wanted)\n", + libcfs_nid2str(nid), + reqmsg->ibm_u.connparams.ibcp_queue_depth, IBLND_MSG_QUEUE_SIZE(version)); if (version == IBLND_MSG_VERSION) @@ -2255,18 +2345,28 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) goto failed; } - if (reqmsg->ibm_u.connparams.ibcp_max_frags != + if (reqmsg->ibm_u.connparams.ibcp_max_frags > IBLND_RDMA_FRAGS(version)) { - CERROR("Can't accept %s(version %x): incompatible max_frags %d (%d wanted)\n", - libcfs_nid2str(nid), version, - reqmsg->ibm_u.connparams.ibcp_max_frags, - IBLND_RDMA_FRAGS(version)); + CWARN("Can't accept conn from %s (version %x): max_frags %d too large (%d wanted)\n", + libcfs_nid2str(nid), version, + reqmsg->ibm_u.connparams.ibcp_max_frags, + IBLND_RDMA_FRAGS(version)); - if (version == IBLND_MSG_VERSION) + if (version >= IBLND_MSG_VERSION) rej.ibr_why = IBLND_REJECT_RDMA_FRAGS; goto failed; + } else if (reqmsg->ibm_u.connparams.ibcp_max_frags < + IBLND_RDMA_FRAGS(version) && !net->ibn_fmr_ps) { + CWARN("Can't accept conn from %s (version %x): max_frags %d incompatible without FMR pool (%d wanted)\n", + libcfs_nid2str(nid), version, + reqmsg->ibm_u.connparams.ibcp_max_frags, + IBLND_RDMA_FRAGS(version)); + + if (version >= IBLND_MSG_VERSION) + rej.ibr_why = IBLND_REJECT_RDMA_FRAGS; + goto failed; } if (reqmsg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) { @@ -2279,17 +2379,21 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) /* assume 'nid' is a new peer; create */ rc = kiblnd_create_peer(ni, &peer, nid); - if (rc != 0) { + if (rc) { CERROR("Can't create peer for %s\n", libcfs_nid2str(nid)); rej.ibr_why = IBLND_REJECT_NO_RESOURCES; goto failed; } + /* We have validated the peer's parameters so use those */ + peer->ibp_max_frags = reqmsg->ibm_u.connparams.ibcp_max_frags; + peer->ibp_queue_depth = reqmsg->ibm_u.connparams.ibcp_queue_depth; + write_lock_irqsave(g_lock, flags); peer2 = kiblnd_find_peer_locked(nid); - if (peer2 != NULL) { - if (peer2->ibp_version == 0) { + if (peer2) { + if (!peer2->ibp_version) { peer2->ibp_version = version; peer2->ibp_incarnation = reqmsg->ibm_srcstamp; } @@ -2298,10 +2402,16 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) if (peer2->ibp_incarnation != reqmsg->ibm_srcstamp || peer2->ibp_version != version) { kiblnd_close_peer_conns_locked(peer2, -ESTALE); + + if (kiblnd_peer_active(peer2)) { + peer2->ibp_incarnation = reqmsg->ibm_srcstamp; + peer2->ibp_version = version; + } write_unlock_irqrestore(g_lock, flags); - CWARN("Conn stale %s [old ver: %x, new ver: %x]\n", - libcfs_nid2str(nid), peer2->ibp_version, version); + CWARN("Conn stale %s version %x/%x incarnation %llu/%llu\n", + libcfs_nid2str(nid), peer2->ibp_version, version, + peer2->ibp_incarnation, reqmsg->ibm_srcstamp); kiblnd_peer_decref(peer); rej.ibr_why = IBLND_REJECT_CONN_STALE; @@ -2309,7 +2419,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) } /* tie-break connection race in favour of the higher NID */ - if (peer2->ibp_connecting != 0 && + if (peer2->ibp_connecting && nid < ni->ni_nid) { write_unlock_irqrestore(g_lock, flags); @@ -2320,24 +2430,37 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) goto failed; } + /** + * passive connection is allowed even this peer is waiting for + * reconnection. + */ + peer2->ibp_reconnecting = 0; peer2->ibp_accepting++; kiblnd_peer_addref(peer2); + /** + * Race with kiblnd_launch_tx (active connect) to create peer + * so copy validated parameters since we now know what the + * peer's limits are + */ + peer2->ibp_max_frags = peer->ibp_max_frags; + peer2->ibp_queue_depth = peer->ibp_queue_depth; + write_unlock_irqrestore(g_lock, flags); kiblnd_peer_decref(peer); peer = peer2; } else { /* Brand new peer */ - LASSERT(peer->ibp_accepting == 0); - LASSERT(peer->ibp_version == 0 && - peer->ibp_incarnation == 0); + LASSERT(!peer->ibp_accepting); + LASSERT(!peer->ibp_version && + !peer->ibp_incarnation); peer->ibp_accepting = 1; peer->ibp_version = version; peer->ibp_incarnation = reqmsg->ibm_srcstamp; /* I have a ref on ni that prevents it being shutdown */ - LASSERT(net->ibn_shutdown == 0); + LASSERT(!net->ibn_shutdown); kiblnd_peer_addref(peer); list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid)); @@ -2345,31 +2468,33 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) write_unlock_irqrestore(g_lock, flags); } - conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_PASSIVE_WAIT, version); - if (conn == NULL) { + conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_PASSIVE_WAIT, + version); + if (!conn) { kiblnd_peer_connect_failed(peer, 0, -ENOMEM); kiblnd_peer_decref(peer); rej.ibr_why = IBLND_REJECT_NO_RESOURCES; goto failed; } - /* conn now "owns" cmid, so I return success from here on to ensure the - * CM callback doesn't destroy cmid. */ - + /* + * conn now "owns" cmid, so I return success from here on to ensure the + * CM callback doesn't destroy cmid. + */ conn->ibc_incarnation = reqmsg->ibm_srcstamp; - conn->ibc_credits = IBLND_MSG_QUEUE_SIZE(version); - conn->ibc_reserved_credits = IBLND_MSG_QUEUE_SIZE(version); - LASSERT(conn->ibc_credits + conn->ibc_reserved_credits + IBLND_OOB_MSGS(version) - <= IBLND_RX_MSGS(version)); + conn->ibc_credits = conn->ibc_queue_depth; + conn->ibc_reserved_credits = conn->ibc_queue_depth; + LASSERT(conn->ibc_credits + conn->ibc_reserved_credits + + IBLND_OOB_MSGS(version) <= IBLND_RX_MSGS(conn)); ackmsg = &conn->ibc_connvars->cv_msg; memset(ackmsg, 0, sizeof(*ackmsg)); kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK, sizeof(ackmsg->ibm_u.connparams)); - ackmsg->ibm_u.connparams.ibcp_queue_depth = IBLND_MSG_QUEUE_SIZE(version); + ackmsg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth; + ackmsg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags; ackmsg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE; - ackmsg->ibm_u.connparams.ibcp_max_frags = IBLND_RDMA_FRAGS(version); kiblnd_pack_msg(ni, ackmsg, version, 0, nid, reqmsg->ibm_srcstamp); @@ -2385,7 +2510,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) CDEBUG(D_NET, "Accept %s\n", libcfs_nid2str(nid)); rc = rdma_accept(cmid, &cp); - if (rc != 0) { + if (rc) { CERROR("Can't accept %s: %d\n", libcfs_nid2str(nid), rc); rej.ibr_version = version; rej.ibr_why = IBLND_REJECT_FATAL; @@ -2399,7 +2524,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) return 0; failed: - if (ni != NULL) + if (ni) lnet_ni_decref(ni); rej.ibr_version = version; @@ -2411,45 +2536,82 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) } static void -kiblnd_reconnect(kib_conn_t *conn, int version, - __u64 incarnation, int why, kib_connparams_t *cp) +kiblnd_check_reconnect(kib_conn_t *conn, int version, + __u64 incarnation, int why, kib_connparams_t *cp) { + rwlock_t *glock = &kiblnd_data.kib_global_lock; kib_peer_t *peer = conn->ibc_peer; char *reason; - int retry = 0; + int msg_size = IBLND_MSG_SIZE; + int frag_num = -1; + int queue_dep = -1; + bool reconnect; unsigned long flags; LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT); LASSERT(peer->ibp_connecting > 0); /* 'conn' at least */ + LASSERT(!peer->ibp_reconnecting); - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); + if (cp) { + msg_size = cp->ibcp_max_msg_size; + frag_num = cp->ibcp_max_frags; + queue_dep = cp->ibcp_queue_depth; + } - /* retry connection if it's still needed and no other connection + write_lock_irqsave(glock, flags); + /** + * retry connection if it's still needed and no other connection * attempts (active or passive) are in progress * NB: reconnect is still needed even when ibp_tx_queue is * empty if ibp_version != version because reconnect may be - * initiated by kiblnd_query() */ - if ((!list_empty(&peer->ibp_tx_queue) || - peer->ibp_version != version) && - peer->ibp_connecting == 1 && - peer->ibp_accepting == 0) { - retry = 1; - peer->ibp_connecting++; - - peer->ibp_version = version; - peer->ibp_incarnation = incarnation; + * initiated by kiblnd_query() + */ + reconnect = (!list_empty(&peer->ibp_tx_queue) || + peer->ibp_version != version) && + peer->ibp_connecting == 1 && + !peer->ibp_accepting; + if (!reconnect) { + reason = "no need"; + goto out; } - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - if (!retry) - return; - switch (why) { default: reason = "Unknown"; break; + case IBLND_REJECT_RDMA_FRAGS: + if (!cp) { + reason = "can't negotiate max frags"; + goto out; + } + if (!*kiblnd_tunables.kib_map_on_demand) { + reason = "map_on_demand must be enabled"; + goto out; + } + if (conn->ibc_max_frags <= frag_num) { + reason = "unsupported max frags"; + goto out; + } + + peer->ibp_max_frags = frag_num; + reason = "rdma fragments"; + break; + + case IBLND_REJECT_MSG_QUEUE_SIZE: + if (!cp) { + reason = "can't negotiate queue depth"; + goto out; + } + if (conn->ibc_queue_depth <= queue_dep) { + reason = "unsupported queue depth"; + goto out; + } + + peer->ibp_queue_depth = queue_dep; + reason = "queue depth"; + break; + case IBLND_REJECT_CONN_STALE: reason = "stale"; break; @@ -2463,14 +2625,24 @@ kiblnd_reconnect(kib_conn_t *conn, int version, break; } - CNETERR("%s: retrying (%s), %x, %x, queue_dep: %d, max_frag: %d, msg_size: %d\n", - libcfs_nid2str(peer->ibp_nid), - reason, IBLND_MSG_VERSION, version, - cp != NULL ? cp->ibcp_queue_depth : IBLND_MSG_QUEUE_SIZE(version), - cp != NULL ? cp->ibcp_max_frags : IBLND_RDMA_FRAGS(version), - cp != NULL ? cp->ibcp_max_msg_size : IBLND_MSG_SIZE); + conn->ibc_reconnect = 1; + peer->ibp_reconnecting = 1; + peer->ibp_version = version; + if (incarnation) + peer->ibp_incarnation = incarnation; +out: + write_unlock_irqrestore(glock, flags); - kiblnd_connect_peer(peer); + CNETERR("%s: %s (%s), %x, %x, msg_size: %d, queue_depth: %d/%d, max_frags: %d/%d\n", + libcfs_nid2str(peer->ibp_nid), + reconnect ? "reconnect" : "don't reconnect", + reason, IBLND_MSG_VERSION, version, msg_size, + conn->ibc_queue_depth, queue_dep, + conn->ibc_max_frags, frag_num); + /** + * if conn::ibc_reconnect is TRUE, connd will reconnect to the peer + * while destroying the zombie + */ } static void @@ -2483,8 +2655,8 @@ kiblnd_rejected(kib_conn_t *conn, int reason, void *priv, int priv_nob) switch (reason) { case IB_CM_REJ_STALE_CONN: - kiblnd_reconnect(conn, IBLND_MSG_VERSION, 0, - IBLND_REJECT_CONN_STALE, NULL); + kiblnd_check_reconnect(conn, IBLND_MSG_VERSION, 0, + IBLND_REJECT_CONN_STALE, NULL); break; case IB_CM_REJ_INVALID_SERVICE_ID: @@ -2521,9 +2693,11 @@ kiblnd_rejected(kib_conn_t *conn, int reason, void *priv, int priv_nob) if (priv_nob >= sizeof(kib_rej_t) && rej->ibr_version > IBLND_MSG_VERSION_1) { - /* priv_nob is always 148 in current version + /* + * priv_nob is always 148 in current version * of OFED, so we still need to check version. - * (define of IB_CM_REJ_PRIVATE_DATA_SIZE) */ + * (define of IB_CM_REJ_PRIVATE_DATA_SIZE) + */ cp = &rej->ibr_cp; if (flip) { @@ -2564,24 +2738,11 @@ kiblnd_rejected(kib_conn_t *conn, int reason, void *priv, int priv_nob) case IBLND_REJECT_CONN_RACE: case IBLND_REJECT_CONN_STALE: case IBLND_REJECT_CONN_UNCOMPAT: - kiblnd_reconnect(conn, rej->ibr_version, - incarnation, rej->ibr_why, cp); - break; - case IBLND_REJECT_MSG_QUEUE_SIZE: - CERROR("%s rejected: incompatible message queue depth %d, %d\n", - libcfs_nid2str(peer->ibp_nid), - cp != NULL ? cp->ibcp_queue_depth : - IBLND_MSG_QUEUE_SIZE(rej->ibr_version), - IBLND_MSG_QUEUE_SIZE(conn->ibc_version)); - break; - case IBLND_REJECT_RDMA_FRAGS: - CERROR("%s rejected: incompatible # of RDMA fragments %d, %d\n", - libcfs_nid2str(peer->ibp_nid), - cp != NULL ? cp->ibcp_max_frags : - IBLND_RDMA_FRAGS(rej->ibr_version), - IBLND_RDMA_FRAGS(conn->ibc_version)); + kiblnd_check_reconnect(conn, rej->ibr_version, + incarnation, + rej->ibr_why, cp); break; case IBLND_REJECT_NO_RESOURCES: @@ -2623,9 +2784,9 @@ kiblnd_check_connreply(kib_conn_t *conn, void *priv, int priv_nob) int rc = kiblnd_unpack_msg(msg, priv_nob); unsigned long flags; - LASSERT(net != NULL); + LASSERT(net); - if (rc != 0) { + if (rc) { CERROR("Can't unpack connack from %s: %d\n", libcfs_nid2str(peer->ibp_nid), rc); goto failed; @@ -2645,22 +2806,22 @@ kiblnd_check_connreply(kib_conn_t *conn, void *priv, int priv_nob) goto failed; } - if (msg->ibm_u.connparams.ibcp_queue_depth != - IBLND_MSG_QUEUE_SIZE(ver)) { - CERROR("%s has incompatible queue depth %d(%d wanted)\n", + if (msg->ibm_u.connparams.ibcp_queue_depth > + conn->ibc_queue_depth) { + CERROR("%s has incompatible queue depth %d (<=%d wanted)\n", libcfs_nid2str(peer->ibp_nid), msg->ibm_u.connparams.ibcp_queue_depth, - IBLND_MSG_QUEUE_SIZE(ver)); + conn->ibc_queue_depth); rc = -EPROTO; goto failed; } - if (msg->ibm_u.connparams.ibcp_max_frags != - IBLND_RDMA_FRAGS(ver)) { - CERROR("%s has incompatible max_frags %d (%d wanted)\n", + if (msg->ibm_u.connparams.ibcp_max_frags > + conn->ibc_max_frags) { + CERROR("%s has incompatible max_frags %d (<=%d wanted)\n", libcfs_nid2str(peer->ibp_nid), msg->ibm_u.connparams.ibcp_max_frags, - IBLND_RDMA_FRAGS(ver)); + conn->ibc_max_frags); rc = -EPROTO; goto failed; } @@ -2682,7 +2843,7 @@ kiblnd_check_connreply(kib_conn_t *conn, void *priv, int priv_nob) rc = -ESTALE; read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - if (rc != 0) { + if (rc) { CERROR("Bad connection reply from %s, rc = %d, version: %x max_frags: %d\n", libcfs_nid2str(peer->ibp_nid), rc, msg->ibm_version, msg->ibm_u.connparams.ibcp_max_frags); @@ -2690,21 +2851,24 @@ kiblnd_check_connreply(kib_conn_t *conn, void *priv, int priv_nob) } conn->ibc_incarnation = msg->ibm_srcstamp; - conn->ibc_credits = - conn->ibc_reserved_credits = IBLND_MSG_QUEUE_SIZE(ver); - LASSERT(conn->ibc_credits + conn->ibc_reserved_credits + IBLND_OOB_MSGS(ver) - <= IBLND_RX_MSGS(ver)); + conn->ibc_credits = msg->ibm_u.connparams.ibcp_queue_depth; + conn->ibc_reserved_credits = msg->ibm_u.connparams.ibcp_queue_depth; + conn->ibc_queue_depth = msg->ibm_u.connparams.ibcp_queue_depth; + conn->ibc_max_frags = msg->ibm_u.connparams.ibcp_max_frags; + LASSERT(conn->ibc_credits + conn->ibc_reserved_credits + + IBLND_OOB_MSGS(ver) <= IBLND_RX_MSGS(conn)); kiblnd_connreq_done(conn, 0); return; failed: - /* NB My QP has already established itself, so I handle anything going + /* + * NB My QP has already established itself, so I handle anything going * wrong here by setting ibc_comms_error. * kiblnd_connreq_done(0) moves the conn state to ESTABLISHED, but then - * immediately tears it down. */ - - LASSERT(rc != 0); + * immediately tears it down. + */ + LASSERT(rc); conn->ibc_comms_error = rc; kiblnd_connreq_done(conn, 0); } @@ -2724,28 +2888,30 @@ kiblnd_active_connect(struct rdma_cm_id *cmid) read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); incarnation = peer->ibp_incarnation; - version = (peer->ibp_version == 0) ? IBLND_MSG_VERSION : - peer->ibp_version; + version = !peer->ibp_version ? IBLND_MSG_VERSION : + peer->ibp_version; read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_ACTIVE_CONNECT, version); - if (conn == NULL) { + conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_ACTIVE_CONNECT, + version); + if (!conn) { kiblnd_peer_connect_failed(peer, 1, -ENOMEM); kiblnd_peer_decref(peer); /* lose cmid's ref */ return -ENOMEM; } - /* conn "owns" cmid now, so I return success from here on to ensure the + /* + * conn "owns" cmid now, so I return success from here on to ensure the * CM callback doesn't destroy cmid. conn also takes over cmid's ref - * on peer */ - + * on peer + */ msg = &conn->ibc_connvars->cv_msg; memset(msg, 0, sizeof(*msg)); kiblnd_init_msg(msg, IBLND_MSG_CONNREQ, sizeof(msg->ibm_u.connparams)); - msg->ibm_u.connparams.ibcp_queue_depth = IBLND_MSG_QUEUE_SIZE(version); - msg->ibm_u.connparams.ibcp_max_frags = IBLND_RDMA_FRAGS(version); + msg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth; + msg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags; msg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE; kiblnd_pack_msg(peer->ibp_ni, msg, version, @@ -2764,7 +2930,7 @@ kiblnd_active_connect(struct rdma_cm_id *cmid) LASSERT(conn->ibc_cmid == cmid); rc = rdma_connect(cmid, &cp); - if (rc != 0) { + if (rc) { CERROR("Can't connect to %s: %d\n", libcfs_nid2str(peer->ibp_nid), rc); kiblnd_connreq_done(conn, rc); @@ -2798,10 +2964,10 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event) case RDMA_CM_EVENT_ADDR_ERROR: peer = (kib_peer_t *)cmid->context; CNETERR("%s: ADDR ERROR %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); + libcfs_nid2str(peer->ibp_nid), event->status); kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH); kiblnd_peer_decref(peer); - return -EHOSTUNREACH; /* rc != 0 destroys cmid */ + return -EHOSTUNREACH; /* rc destroys cmid */ case RDMA_CM_EVENT_ADDR_RESOLVED: peer = (kib_peer_t *)cmid->context; @@ -2809,14 +2975,14 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event) CDEBUG(D_NET, "%s Addr resolved: %d\n", libcfs_nid2str(peer->ibp_nid), event->status); - if (event->status != 0) { + if (event->status) { CNETERR("Can't resolve address for %s: %d\n", libcfs_nid2str(peer->ibp_nid), event->status); rc = event->status; } else { rc = rdma_resolve_route( cmid, *kiblnd_tunables.kib_timeout * 1000); - if (rc == 0) + if (!rc) return 0; /* Can't initiate route resolution */ CERROR("Can't resolve route for %s: %d\n", @@ -2824,7 +2990,7 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event) } kiblnd_peer_connect_failed(peer, 1, rc); kiblnd_peer_decref(peer); - return rc; /* rc != 0 destroys cmid */ + return rc; /* rc destroys cmid */ case RDMA_CM_EVENT_ROUTE_ERROR: peer = (kib_peer_t *)cmid->context; @@ -2832,28 +2998,28 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event) libcfs_nid2str(peer->ibp_nid), event->status); kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH); kiblnd_peer_decref(peer); - return -EHOSTUNREACH; /* rc != 0 destroys cmid */ + return -EHOSTUNREACH; /* rc destroys cmid */ case RDMA_CM_EVENT_ROUTE_RESOLVED: peer = (kib_peer_t *)cmid->context; CDEBUG(D_NET, "%s Route resolved: %d\n", libcfs_nid2str(peer->ibp_nid), event->status); - if (event->status == 0) + if (!event->status) return kiblnd_active_connect(cmid); CNETERR("Can't resolve route for %s: %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); + libcfs_nid2str(peer->ibp_nid), event->status); kiblnd_peer_connect_failed(peer, 1, event->status); kiblnd_peer_decref(peer); - return event->status; /* rc != 0 destroys cmid */ + return event->status; /* rc destroys cmid */ case RDMA_CM_EVENT_UNREACHABLE: conn = (kib_conn_t *)cmid->context; LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT || conn->ibc_state == IBLND_CONN_PASSIVE_WAIT); CNETERR("%s: UNREACHABLE %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status); + libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status); kiblnd_connreq_done(conn, -ENETDOWN); kiblnd_conn_decref(conn); return 0; @@ -2876,8 +3042,8 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event) case IBLND_CONN_PASSIVE_WAIT: CERROR("%s: REJECTED %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - event->status); + libcfs_nid2str(conn->ibc_peer->ibp_nid), + event->status); kiblnd_connreq_done(conn, -ECONNRESET); break; @@ -2933,8 +3099,10 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event) LCONSOLE_ERROR_MSG(0x131, "Received notification of device removal\n" "Please shutdown LNET to allow this to proceed\n"); - /* Can't remove network from underneath LNET for now, so I have - * to ignore this */ + /* + * Can't remove network from underneath LNET for now, so I have + * to ignore this + */ return 0; case RDMA_CM_EVENT_ADDR_CHANGE: @@ -2956,7 +3124,7 @@ kiblnd_check_txs_locked(kib_conn_t *conn, struct list_head *txs) LASSERT(tx->tx_queued); } else { LASSERT(!tx->tx_queued); - LASSERT(tx->tx_waiting || tx->tx_sending != 0); + LASSERT(tx->tx_waiting || tx->tx_sending); } if (cfs_time_aftereq(jiffies, tx->tx_deadline)) { @@ -2989,13 +3157,16 @@ kiblnd_check_conns(int idx) struct list_head *ptmp; kib_peer_t *peer; kib_conn_t *conn; + kib_conn_t *temp; kib_conn_t *tmp; struct list_head *ctmp; unsigned long flags; - /* NB. We expect to have a look at all the peers and not find any + /* + * NB. We expect to have a look at all the peers and not find any * RDMAs to time out, so we just use a shared lock while we - * take a look... */ + * take a look... + */ read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); list_for_each(ptmp, peers) { @@ -3028,8 +3199,7 @@ kiblnd_check_conns(int idx) conn->ibc_reserved_credits); list_add(&conn->ibc_connd_list, &closes); } else { - list_add(&conn->ibc_connd_list, - &checksends); + list_add(&conn->ibc_connd_list, &checksends); } /* +ref for 'closes' or 'checksends' */ kiblnd_conn_addref(conn); @@ -3040,21 +3210,23 @@ kiblnd_check_conns(int idx) read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - /* Handle timeout by closing the whole + /* + * Handle timeout by closing the whole * connection. We can only be sure RDMA activity - * has ceased once the QP has been modified. */ + * has ceased once the QP has been modified. + */ list_for_each_entry_safe(conn, tmp, &closes, ibc_connd_list) { list_del(&conn->ibc_connd_list); kiblnd_close_conn(conn, -ETIMEDOUT); kiblnd_conn_decref(conn); } - /* In case we have enough credits to return via a + /* + * In case we have enough credits to return via a * NOOP, but there were no non-blocking tx descs - * free to do it last time... */ - while (!list_empty(&checksends)) { - conn = list_entry(checksends.next, - kib_conn_t, ibc_connd_list); + * free to do it last time... + */ + list_for_each_entry_safe(conn, temp, &checksends, ibc_connd_list) { list_del(&conn->ibc_connd_list); kiblnd_check_sends(conn); kiblnd_conn_decref(conn); @@ -3074,9 +3246,21 @@ kiblnd_disconnect_conn(kib_conn_t *conn) kiblnd_peer_notify(conn->ibc_peer); } +/** + * High-water for reconnection to the same peer, reconnection attempt should + * be delayed after trying more than KIB_RECONN_HIGH_RACE. + */ +#define KIB_RECONN_HIGH_RACE 10 +/** + * Allow connd to take a break and handle other things after consecutive + * reconnection attemps. + */ +#define KIB_RECONN_BREAK 100 + int kiblnd_connd(void *arg) { + spinlock_t *lock= &kiblnd_data.kib_connd_lock; wait_queue_t wait; unsigned long flags; kib_conn_t *conn; @@ -3091,39 +3275,79 @@ kiblnd_connd(void *arg) init_waitqueue_entry(&wait, current); kiblnd_data.kib_connd = current; - spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags); + spin_lock_irqsave(lock, flags); while (!kiblnd_data.kib_shutdown) { + int reconn = 0; dropped_lock = 0; if (!list_empty(&kiblnd_data.kib_connd_zombies)) { + kib_peer_t *peer = NULL; + conn = list_entry(kiblnd_data.kib_connd_zombies.next, - kib_conn_t, ibc_list); + kib_conn_t, ibc_list); list_del(&conn->ibc_list); + if (conn->ibc_reconnect) { + peer = conn->ibc_peer; + kiblnd_peer_addref(peer); + } - spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, - flags); + spin_unlock_irqrestore(lock, flags); dropped_lock = 1; - kiblnd_destroy_conn(conn); + kiblnd_destroy_conn(conn, !peer); + + spin_lock_irqsave(lock, flags); + if (!peer) + continue; - spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags); + conn->ibc_peer = peer; + if (peer->ibp_reconnected < KIB_RECONN_HIGH_RACE) + list_add_tail(&conn->ibc_list, + &kiblnd_data.kib_reconn_list); + else + list_add_tail(&conn->ibc_list, + &kiblnd_data.kib_reconn_wait); } if (!list_empty(&kiblnd_data.kib_connd_conns)) { conn = list_entry(kiblnd_data.kib_connd_conns.next, - kib_conn_t, ibc_list); + kib_conn_t, ibc_list); list_del(&conn->ibc_list); - spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, - flags); + spin_unlock_irqrestore(lock, flags); dropped_lock = 1; kiblnd_disconnect_conn(conn); kiblnd_conn_decref(conn); - spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags); + spin_lock_irqsave(lock, flags); + } + + while (reconn < KIB_RECONN_BREAK) { + if (kiblnd_data.kib_reconn_sec != + ktime_get_real_seconds()) { + kiblnd_data.kib_reconn_sec = ktime_get_real_seconds(); + list_splice_init(&kiblnd_data.kib_reconn_wait, + &kiblnd_data.kib_reconn_list); + } + + if (list_empty(&kiblnd_data.kib_reconn_list)) + break; + + conn = list_entry(kiblnd_data.kib_reconn_list.next, + kib_conn_t, ibc_list); + list_del(&conn->ibc_list); + + spin_unlock_irqrestore(lock, flags); + dropped_lock = 1; + + reconn += kiblnd_reconnect_peer(conn->ibc_peer); + kiblnd_peer_decref(conn->ibc_peer); + LIBCFS_FREE(conn, sizeof(*conn)); + + spin_lock_irqsave(lock, flags); } /* careful with the jiffy wrap... */ @@ -3133,21 +3357,22 @@ kiblnd_connd(void *arg) const int p = 1; int chunk = kiblnd_data.kib_peer_hash_size; - spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags); + spin_unlock_irqrestore(lock, flags); dropped_lock = 1; - /* Time to check for RDMA timeouts on a few more + /* + * Time to check for RDMA timeouts on a few more * peers: I do checks every 'p' seconds on a * proportion of the peer table and I need to check * every connection 'n' times within a timeout * interval, to ensure I detect a timeout on any * connection within (n+1)/n times the timeout - * interval. */ - + * interval. + */ if (*kiblnd_tunables.kib_timeout > n * p) chunk = (chunk * n * p) / *kiblnd_tunables.kib_timeout; - if (chunk == 0) + if (!chunk) chunk = 1; for (i = 0; i < chunk; i++) { @@ -3156,8 +3381,8 @@ kiblnd_connd(void *arg) kiblnd_data.kib_peer_hash_size; } - deadline += p * HZ; - spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags); + deadline += msecs_to_jiffies(p * MSEC_PER_SEC); + spin_lock_irqsave(lock, flags); } if (dropped_lock) @@ -3166,15 +3391,15 @@ kiblnd_connd(void *arg) /* Nothing to do for 'timeout' */ set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&kiblnd_data.kib_connd_waitq, &wait); - spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags); + spin_unlock_irqrestore(lock, flags); schedule_timeout(timeout); remove_wait_queue(&kiblnd_data.kib_connd_waitq, &wait); - spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags); + spin_lock_irqsave(lock, flags); } - spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags); + spin_unlock_irqrestore(lock, flags); kiblnd_thread_fini(); return 0; @@ -3206,12 +3431,14 @@ kiblnd_complete(struct ib_wc *wc) LBUG(); case IBLND_WID_RDMA: - /* We only get RDMA completion notification if it fails. All + /* + * We only get RDMA completion notification if it fails. All * subsequent work items, including the final SEND will fail * too. However we can't print out any more info about the * failing RDMA because 'tx' might be back on the idle list or * even reused already if we didn't manage to post all our work - * items */ + * items + */ CNETERR("RDMA (tx: %p) failed: %d\n", kiblnd_wreqid2ptr(wc->wr_id), wc->status); return; @@ -3230,11 +3457,13 @@ kiblnd_complete(struct ib_wc *wc) void kiblnd_cq_completion(struct ib_cq *cq, void *arg) { - /* NB I'm not allowed to schedule this conn once its refcount has + /* + * NB I'm not allowed to schedule this conn once its refcount has * reached 0. Since fundamentally I'm racing with scheduler threads * consuming my CQ I could be called after all completions have - * occurred. But in this case, ibc_nrx == 0 && ibc_nsends_posted == 0 - * and this CQ is about to be destroyed so I NOOP. */ + * occurred. But in this case, !ibc_nrx && !ibc_nsends_posted + * and this CQ is about to be destroyed so I NOOP. + */ kib_conn_t *conn = arg; struct kib_sched_info *sched = conn->ibc_sched; unsigned long flags; @@ -3288,7 +3517,7 @@ kiblnd_scheduler(void *arg) sched = kiblnd_data.kib_scheds[KIB_THREAD_CPT(id)]; rc = cfs_cpt_bind(lnet_cpt_table(), sched->ibs_cpt); - if (rc != 0) { + if (rc) { CWARN("Failed to bind on CPT %d, please verify whether all CPUs are healthy and reload modules if necessary, otherwise your system might under risk of low performance\n", sched->ibs_cpt); } @@ -3308,8 +3537,8 @@ kiblnd_scheduler(void *arg) did_something = 0; if (!list_empty(&sched->ibs_conns)) { - conn = list_entry(sched->ibs_conns.next, - kib_conn_t, ibc_sched_list); + conn = list_entry(sched->ibs_conns.next, kib_conn_t, + ibc_sched_list); /* take over kib_sched_conns' ref on conn... */ LASSERT(conn->ibc_scheduled); list_del(&conn->ibc_sched_list); @@ -3317,8 +3546,10 @@ kiblnd_scheduler(void *arg) spin_unlock_irqrestore(&sched->ibs_lock, flags); + wc.wr_id = IBLND_WID_INVAL; + rc = ib_poll_cq(conn->ibc_cq, 1, &wc); - if (rc == 0) { + if (!rc) { rc = ib_req_notify_cq(conn->ibc_cq, IB_CQ_NEXT_COMP); if (rc < 0) { @@ -3327,13 +3558,22 @@ kiblnd_scheduler(void *arg) kiblnd_close_conn(conn, -EIO); kiblnd_conn_decref(conn); spin_lock_irqsave(&sched->ibs_lock, - flags); + flags); continue; } rc = ib_poll_cq(conn->ibc_cq, 1, &wc); } + if (unlikely(rc > 0 && wc.wr_id == IBLND_WID_INVAL)) { + LCONSOLE_ERROR("ib_poll_cq (rc: %d) returned invalid wr_id, opcode %d, status: %d, vendor_err: %d, conn: %s status: %d\nplease upgrade firmware and OFED or contact vendor.\n", + rc, wc.opcode, wc.status, + wc.vendor_err, + libcfs_nid2str(conn->ibc_peer->ibp_nid), + conn->ibc_state); + rc = -EINVAL; + } + if (rc < 0) { CWARN("%s: ib_poll_cq failed: %d, closing connection\n", libcfs_nid2str(conn->ibc_peer->ibp_nid), @@ -3346,21 +3586,23 @@ kiblnd_scheduler(void *arg) spin_lock_irqsave(&sched->ibs_lock, flags); - if (rc != 0 || conn->ibc_ready) { - /* There may be another completion waiting; get + if (rc || conn->ibc_ready) { + /* + * There may be another completion waiting; get * another scheduler to check while I handle - * this one... */ + * this one... + */ /* +1 ref for sched_conns */ kiblnd_conn_addref(conn); list_add_tail(&conn->ibc_sched_list, - &sched->ibs_conns); + &sched->ibs_conns); if (waitqueue_active(&sched->ibs_waitq)) wake_up(&sched->ibs_waitq); } else { conn->ibc_scheduled = 0; } - if (rc != 0) { + if (rc) { spin_unlock_irqrestore(&sched->ibs_lock, flags); kiblnd_complete(&wc); @@ -3400,7 +3642,7 @@ kiblnd_failover_thread(void *arg) unsigned long flags; int rc; - LASSERT(*kiblnd_tunables.kib_dev_failover != 0); + LASSERT(*kiblnd_tunables.kib_dev_failover); cfs_block_allsigs(); @@ -3459,13 +3701,15 @@ kiblnd_failover_thread(void *arg) remove_wait_queue(&kiblnd_data.kib_failover_waitq, &wait); write_lock_irqsave(glock, flags); - if (!long_sleep || rc != 0) + if (!long_sleep || rc) continue; - /* have a long sleep, routine check all active devices, + /* + * have a long sleep, routine check all active devices, * we need checking like this because if there is not active * connection on the dev and no SEND from local, we may listen - * on wrong HCA for ever while there is a bonding failover */ + * on wrong HCA for ever while there is a bonding failover + */ list_for_each_entry(dev, &kiblnd_data.kib_devs, ibd_list) { if (kiblnd_dev_can_failover(dev)) { list_add_tail(&dev->ibd_fail_list, diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c index 1d4e7efb5..b4607dad3 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c @@ -52,8 +52,10 @@ static int timeout = 50; module_param(timeout, int, 0644); MODULE_PARM_DESC(timeout, "timeout (seconds)"); -/* Number of threads in each scheduler pool which is percpt, - * we will estimate reasonable value based on CPUs if it's set to zero. */ +/* + * Number of threads in each scheduler pool which is percpt, + * we will estimate reasonable value based on CPUs if it's set to zero. + */ static int nscheds; module_param(nscheds, int, 0444); MODULE_PARM_DESC(nscheds, "number of threads in each scheduler pool"); @@ -200,7 +202,7 @@ kiblnd_tunables_init(void) if (*kiblnd_tunables.kib_map_on_demand == 1) *kiblnd_tunables.kib_map_on_demand = 2; /* don't make sense to create map if only one fragment */ - if (*kiblnd_tunables.kib_concurrent_sends == 0) { + if (!*kiblnd_tunables.kib_concurrent_sends) { if (*kiblnd_tunables.kib_map_on_demand > 0 && *kiblnd_tunables.kib_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8) *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits) * 2; diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c index 05aa90ea5..cca7b2f7f 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c @@ -70,7 +70,7 @@ ksocknal_create_route(__u32 ipaddr, int port) ksock_route_t *route; LIBCFS_ALLOC(route, sizeof(*route)); - if (route == NULL) + if (!route) return NULL; atomic_set(&route->ksnr_refcount, 1); @@ -91,9 +91,9 @@ ksocknal_create_route(__u32 ipaddr, int port) void ksocknal_destroy_route(ksock_route_t *route) { - LASSERT(atomic_read(&route->ksnr_refcount) == 0); + LASSERT(!atomic_read(&route->ksnr_refcount)); - if (route->ksnr_peer != NULL) + if (route->ksnr_peer) ksocknal_peer_decref(route->ksnr_peer); LIBCFS_FREE(route, sizeof(*route)); @@ -102,6 +102,7 @@ ksocknal_destroy_route(ksock_route_t *route) static int ksocknal_create_peer(ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id) { + int cpt = lnet_cpt_of_nid(id.nid); ksock_net_t *net = ni->ni_data; ksock_peer_t *peer; @@ -109,8 +110,8 @@ ksocknal_create_peer(ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id) LASSERT(id.pid != LNET_PID_ANY); LASSERT(!in_interrupt()); - LIBCFS_ALLOC(peer, sizeof(*peer)); - if (peer == NULL) + LIBCFS_CPT_ALLOC(peer, lnet_cpt_table(), cpt, sizeof(*peer)); + if (!peer) return -ENOMEM; peer->ksnp_ni = ni; @@ -152,10 +153,10 @@ ksocknal_destroy_peer(ksock_peer_t *peer) ksock_net_t *net = peer->ksnp_ni->ni_data; CDEBUG(D_NET, "peer %s %p deleted\n", - libcfs_id2str(peer->ksnp_id), peer); + libcfs_id2str(peer->ksnp_id), peer); - LASSERT(atomic_read(&peer->ksnp_refcount) == 0); - LASSERT(peer->ksnp_accepting == 0); + LASSERT(!atomic_read(&peer->ksnp_refcount)); + LASSERT(!peer->ksnp_accepting); LASSERT(list_empty(&peer->ksnp_conns)); LASSERT(list_empty(&peer->ksnp_routes)); LASSERT(list_empty(&peer->ksnp_tx_queue)); @@ -163,10 +164,12 @@ ksocknal_destroy_peer(ksock_peer_t *peer) LIBCFS_FREE(peer, sizeof(*peer)); - /* NB a peer's connections and routes keep a reference on their peer + /* + * NB a peer's connections and routes keep a reference on their peer * until they are destroyed, so we can be assured that _all_ state to * do with this peer has been cleaned up when its refcount drops to - * zero. */ + * zero. + */ spin_lock_bh(&net->ksnn_lock); net->ksnn_npeers--; spin_unlock_bh(&net->ksnn_lock); @@ -180,7 +183,6 @@ ksocknal_find_peer_locked(lnet_ni_t *ni, lnet_process_id_t id) ksock_peer_t *peer; list_for_each(tmp, peer_list) { - peer = list_entry(tmp, ksock_peer_t, ksnp_list); LASSERT(!peer->ksnp_closing); @@ -207,7 +209,7 @@ ksocknal_find_peer(lnet_ni_t *ni, lnet_process_id_t id) read_lock(&ksocknal_data.ksnd_global_lock); peer = ksocknal_find_peer_locked(ni, id); - if (peer != NULL) /* +1 ref for caller? */ + if (peer) /* +1 ref for caller? */ ksocknal_peer_addref(peer); read_unlock(&ksocknal_data.ksnd_global_lock); @@ -226,9 +228,11 @@ ksocknal_unlink_peer_locked(ksock_peer_t *peer) ip = peer->ksnp_passive_ips[i]; iface = ksocknal_ip2iface(peer->ksnp_ni, ip); - /* All IPs in peer->ksnp_passive_ips[] come from the - * interface list, therefore the call must succeed. */ - LASSERT(iface != NULL); + /* + * All IPs in peer->ksnp_passive_ips[] come from the + * interface list, therefore the call must succeed. + */ + LASSERT(iface); CDEBUG(D_NET, "peer=%p iface=%p ksni_nroutes=%d\n", peer, iface, iface->ksni_nroutes); @@ -246,8 +250,8 @@ ksocknal_unlink_peer_locked(ksock_peer_t *peer) static int ksocknal_get_peer_info(lnet_ni_t *ni, int index, - lnet_process_id_t *id, __u32 *myip, __u32 *peer_ip, - int *port, int *conn_count, int *share_count) + lnet_process_id_t *id, __u32 *myip, __u32 *peer_ip, + int *port, int *conn_count, int *share_count) { ksock_peer_t *peer; struct list_head *ptmp; @@ -260,14 +264,13 @@ ksocknal_get_peer_info(lnet_ni_t *ni, int index, read_lock(&ksocknal_data.ksnd_global_lock); for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) { peer = list_entry(ptmp, ksock_peer_t, ksnp_list); if (peer->ksnp_ni != ni) continue; - if (peer->ksnp_n_passive_ips == 0 && + if (!peer->ksnp_n_passive_ips && list_empty(&peer->ksnp_routes)) { if (index-- > 0) continue; @@ -301,7 +304,7 @@ ksocknal_get_peer_info(lnet_ni_t *ni, int index, continue; route = list_entry(rtmp, ksock_route_t, - ksnr_list); + ksnr_list); *id = peer->ksnp_id; *myip = route->ksnr_myipaddr; @@ -330,7 +333,7 @@ ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn) ksocknal_route_addref(route); if (route->ksnr_myipaddr != conn->ksnc_myipaddr) { - if (route->ksnr_myipaddr == 0) { + if (!route->ksnr_myipaddr) { /* route wasn't bound locally yet (the initial route) */ CDEBUG(D_NET, "Binding %s %pI4h to %pI4h\n", libcfs_id2str(peer->ksnp_id), @@ -345,21 +348,23 @@ ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn) iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni, route->ksnr_myipaddr); - if (iface != NULL) + if (iface) iface->ksni_nroutes--; } route->ksnr_myipaddr = conn->ksnc_myipaddr; iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni, route->ksnr_myipaddr); - if (iface != NULL) + if (iface) iface->ksni_nroutes++; } - route->ksnr_connected |= (1<<type); + route->ksnr_connected |= (1 << type); route->ksnr_conn_count++; - /* Successful connection => further attempts can - * proceed immediately */ + /* + * Successful connection => further attempts can + * proceed immediately + */ route->ksnr_retry_interval = 0; } @@ -371,10 +376,10 @@ ksocknal_add_route_locked(ksock_peer_t *peer, ksock_route_t *route) ksock_route_t *route2; LASSERT(!peer->ksnp_closing); - LASSERT(route->ksnr_peer == NULL); + LASSERT(!route->ksnr_peer); LASSERT(!route->ksnr_scheduled); LASSERT(!route->ksnr_connecting); - LASSERT(route->ksnr_connected == 0); + LASSERT(!route->ksnr_connected); /* LASSERT(unique) */ list_for_each(tmp, &peer->ksnp_routes) { @@ -382,8 +387,8 @@ ksocknal_add_route_locked(ksock_peer_t *peer, ksock_route_t *route) if (route2->ksnr_ipaddr == route->ksnr_ipaddr) { CERROR("Duplicate route %s %pI4h\n", - libcfs_id2str(peer->ksnp_id), - &route->ksnr_ipaddr); + libcfs_id2str(peer->ksnp_id), + &route->ksnr_ipaddr); LBUG(); } } @@ -425,10 +430,10 @@ ksocknal_del_route_locked(ksock_route_t *route) ksocknal_close_conn_locked(conn, 0); } - if (route->ksnr_myipaddr != 0) { + if (route->ksnr_myipaddr) { iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni, route->ksnr_myipaddr); - if (iface != NULL) + if (iface) iface->ksni_nroutes--; } @@ -438,8 +443,10 @@ ksocknal_del_route_locked(ksock_route_t *route) if (list_empty(&peer->ksnp_routes) && list_empty(&peer->ksnp_conns)) { - /* I've just removed the last route to a peer with no active - * connections */ + /* + * I've just removed the last route to a peer with no active + * connections + */ ksocknal_unlink_peer_locked(peer); } } @@ -460,11 +467,11 @@ ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port) /* Have a brand new peer ready... */ rc = ksocknal_create_peer(&peer, ni, id); - if (rc != 0) + if (rc) return rc; route = ksocknal_create_route(ipaddr, port); - if (route == NULL) { + if (!route) { ksocknal_peer_decref(peer); return -ENOMEM; } @@ -472,16 +479,16 @@ ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port) write_lock_bh(&ksocknal_data.ksnd_global_lock); /* always called with a ref on ni, so shutdown can't have started */ - LASSERT(((ksock_net_t *) ni->ni_data)->ksnn_shutdown == 0); + LASSERT(!((ksock_net_t *) ni->ni_data)->ksnn_shutdown); peer2 = ksocknal_find_peer_locked(ni, id); - if (peer2 != NULL) { + if (peer2) { ksocknal_peer_decref(peer); peer = peer2; } else { /* peer table takes my ref on peer */ list_add_tail(&peer->ksnp_list, - ksocknal_nid2peerlist(id.nid)); + ksocknal_nid2peerlist(id.nid)); } route2 = NULL; @@ -493,7 +500,7 @@ ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port) route2 = NULL; } - if (route2 == NULL) { + if (!route2) { ksocknal_add_route_locked(peer, route); route->ksnr_share_count++; } else { @@ -524,7 +531,7 @@ ksocknal_del_peer_locked(ksock_peer_t *peer, __u32 ip) route = list_entry(tmp, ksock_route_t, ksnr_list); /* no match */ - if (!(ip == 0 || route->ksnr_ipaddr == ip)) + if (!(!ip || route->ksnr_ipaddr == ip)) continue; route->ksnr_share_count = 0; @@ -538,15 +545,16 @@ ksocknal_del_peer_locked(ksock_peer_t *peer, __u32 ip) nshared += route->ksnr_share_count; } - if (nshared == 0) { - /* remove everything else if there are no explicit entries - * left */ - + if (!nshared) { + /* + * remove everything else if there are no explicit entries + * left + */ list_for_each_safe(tmp, nxt, &peer->ksnp_routes) { route = list_entry(tmp, ksock_route_t, ksnr_list); /* we should only be removing auto-entries */ - LASSERT(route->ksnr_share_count == 0); + LASSERT(!route->ksnr_share_count); ksocknal_del_route_locked(route); } @@ -575,16 +583,16 @@ ksocknal_del_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ip) write_lock_bh(&ksocknal_data.ksnd_global_lock); - if (id.nid != LNET_NID_ANY) - lo = hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers); - else { + if (id.nid != LNET_NID_ANY) { + lo = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers); + hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers); + } else { lo = 0; hi = ksocknal_data.ksnd_peer_hash_size - 1; } for (i = lo; i <= hi; i++) { - list_for_each_safe(ptmp, pnxt, - &ksocknal_data.ksnd_peers[i]) { + list_for_each_safe(ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) { peer = list_entry(ptmp, ksock_peer_t, ksnp_list); if (peer->ksnp_ni != ni) @@ -604,7 +612,7 @@ ksocknal_del_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ip) LASSERT(list_empty(&peer->ksnp_routes)); list_splice_init(&peer->ksnp_tx_queue, - &zombies); + &zombies); } ksocknal_peer_decref(peer); /* ...till here */ @@ -645,7 +653,7 @@ ksocknal_get_conn_by_idx(lnet_ni_t *ni, int index) continue; conn = list_entry(ctmp, ksock_conn_t, - ksnc_list); + ksnc_list); ksocknal_conn_addref(conn); read_unlock(&ksocknal_data.ksnd_global_lock); return conn; @@ -692,8 +700,10 @@ ksocknal_local_ipvec(lnet_ni_t *ni, __u32 *ipaddrs) nip = net->ksnn_ninterfaces; LASSERT(nip <= LNET_MAX_INTERFACES); - /* Only offer interfaces for additional connections if I have - * more than one. */ + /* + * Only offer interfaces for additional connections if I have + * more than one. + */ if (nip < 2) { read_unlock(&ksocknal_data.ksnd_global_lock); return 0; @@ -701,7 +711,7 @@ ksocknal_local_ipvec(lnet_ni_t *ni, __u32 *ipaddrs) for (i = 0; i < nip; i++) { ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr; - LASSERT(ipaddrs[i] != 0); + LASSERT(ipaddrs[i]); } read_unlock(&ksocknal_data.ksnd_global_lock); @@ -719,11 +729,11 @@ ksocknal_match_peerip(ksock_interface_t *iface, __u32 *ips, int nips) int i; for (i = 0; i < nips; i++) { - if (ips[i] == 0) + if (!ips[i]) continue; this_xor = ips[i] ^ iface->ksni_ipaddr; - this_netmatch = ((this_xor & iface->ksni_netmask) == 0) ? 1 : 0; + this_netmatch = !(this_xor & iface->ksni_netmask) ? 1 : 0; if (!(best < 0 || best_netmatch < this_netmatch || @@ -757,38 +767,45 @@ ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips) int best_netmatch; int best_npeers; - /* CAVEAT EMPTOR: We do all our interface matching with an + /* + * CAVEAT EMPTOR: We do all our interface matching with an * exclusive hold of global lock at IRQ priority. We're only * expecting to be dealing with small numbers of interfaces, so the - * O(n**3)-ness shouldn't matter */ - - /* Also note that I'm not going to return more than n_peerips - * interfaces, even if I have more myself */ - + * O(n**3)-ness shouldn't matter + */ + /* + * Also note that I'm not going to return more than n_peerips + * interfaces, even if I have more myself + */ write_lock_bh(global_lock); LASSERT(n_peerips <= LNET_MAX_INTERFACES); LASSERT(net->ksnn_ninterfaces <= LNET_MAX_INTERFACES); - /* Only match interfaces for additional connections - * if I have > 1 interface */ + /* + * Only match interfaces for additional connections + * if I have > 1 interface + */ n_ips = (net->ksnn_ninterfaces < 2) ? 0 : min(n_peerips, net->ksnn_ninterfaces); for (i = 0; peer->ksnp_n_passive_ips < n_ips; i++) { /* ^ yes really... */ - /* If we have any new interfaces, first tick off all the + /* + * If we have any new interfaces, first tick off all the * peer IPs that match old interfaces, then choose new * interfaces to match the remaining peer IPS. * We don't forget interfaces we've stopped using; we might - * start using them again... */ - + * start using them again... + */ if (i < peer->ksnp_n_passive_ips) { /* Old interface. */ ip = peer->ksnp_passive_ips[i]; best_iface = ksocknal_ip2iface(peer->ksnp_ni, ip); + /* peer passive ips are kept up to date */ + LASSERT(best_iface); } else { /* choose a new interface */ LASSERT(i == peer->ksnp_n_passive_ips); @@ -810,9 +827,9 @@ ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips) k = ksocknal_match_peerip(iface, peerips, n_peerips); xor = ip ^ peerips[k]; - this_netmatch = ((xor & iface->ksni_netmask) == 0) ? 1 : 0; + this_netmatch = !(xor & iface->ksni_netmask) ? 1 : 0; - if (!(best_iface == NULL || + if (!(!best_iface || best_netmatch < this_netmatch || (best_netmatch == this_netmatch && best_npeers > iface->ksni_npeers))) @@ -823,10 +840,12 @@ ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips) best_npeers = iface->ksni_npeers; } + LASSERT(best_iface); + best_iface->ksni_npeers++; ip = best_iface->ksni_ipaddr; peer->ksnp_passive_ips[i] = ip; - peer->ksnp_n_passive_ips = i+1; + peer->ksnp_n_passive_ips = i + 1; } /* mark the best matching peer IP used */ @@ -860,16 +879,19 @@ ksocknal_create_routes(ksock_peer_t *peer, int port, int i; int j; - /* CAVEAT EMPTOR: We do all our interface matching with an + /* + * CAVEAT EMPTOR: We do all our interface matching with an * exclusive hold of global lock at IRQ priority. We're only * expecting to be dealing with small numbers of interfaces, so the - * O(n**3)-ness here shouldn't matter */ - + * O(n**3)-ness here shouldn't matter + */ write_lock_bh(global_lock); if (net->ksnn_ninterfaces < 2) { - /* Only create additional connections - * if I have > 1 interface */ + /* + * Only create additional connections + * if I have > 1 interface + */ write_unlock_bh(global_lock); return; } @@ -877,13 +899,13 @@ ksocknal_create_routes(ksock_peer_t *peer, int port, LASSERT(npeer_ipaddrs <= LNET_MAX_INTERFACES); for (i = 0; i < npeer_ipaddrs; i++) { - if (newroute != NULL) { + if (newroute) { newroute->ksnr_ipaddr = peer_ipaddrs[i]; } else { write_unlock_bh(global_lock); newroute = ksocknal_create_route(peer_ipaddrs[i], port); - if (newroute == NULL) + if (!newroute) return; write_lock_bh(global_lock); @@ -904,7 +926,7 @@ ksocknal_create_routes(ksock_peer_t *peer, int port, route = NULL; } - if (route != NULL) + if (route) continue; best_iface = NULL; @@ -920,21 +942,21 @@ ksocknal_create_routes(ksock_peer_t *peer, int port, /* Using this interface already? */ list_for_each(rtmp, &peer->ksnp_routes) { route = list_entry(rtmp, ksock_route_t, - ksnr_list); + ksnr_list); if (route->ksnr_myipaddr == iface->ksni_ipaddr) break; route = NULL; } - if (route != NULL) + if (route) continue; - this_netmatch = (((iface->ksni_ipaddr ^ + this_netmatch = (!((iface->ksni_ipaddr ^ newroute->ksnr_ipaddr) & - iface->ksni_netmask) == 0) ? 1 : 0; + iface->ksni_netmask)) ? 1 : 0; - if (!(best_iface == NULL || + if (!(!best_iface || best_netmatch < this_netmatch || (best_netmatch == this_netmatch && best_nroutes > iface->ksni_nroutes))) @@ -945,7 +967,7 @@ ksocknal_create_routes(ksock_peer_t *peer, int port, best_nroutes = iface->ksni_nroutes; } - if (best_iface == NULL) + if (!best_iface) continue; newroute->ksnr_myipaddr = best_iface->ksni_ipaddr; @@ -956,7 +978,7 @@ ksocknal_create_routes(ksock_peer_t *peer, int port, } write_unlock_bh(global_lock); - if (newroute != NULL) + if (newroute) ksocknal_route_decref(newroute); } @@ -969,10 +991,10 @@ ksocknal_accept(lnet_ni_t *ni, struct socket *sock) int peer_port; rc = lnet_sock_getaddr(sock, 1, &peer_ip, &peer_port); - LASSERT(rc == 0); /* we succeeded before */ + LASSERT(!rc); /* we succeeded before */ LIBCFS_ALLOC(cr, sizeof(*cr)); - if (cr == NULL) { + if (!cr) { LCONSOLE_ERROR_MSG(0x12f, "Dropping connection request from %pI4h: memory exhausted\n", &peer_ip); return -ENOMEM; @@ -997,7 +1019,6 @@ ksocknal_connecting(ksock_peer_t *peer, __u32 ipaddr) ksock_route_t *route; list_for_each_entry(route, &peer->ksnp_routes, ksnr_list) { - if (route->ksnr_ipaddr == ipaddr) return route->ksnr_connecting; } @@ -1006,7 +1027,7 @@ ksocknal_connecting(ksock_peer_t *peer, __u32 ipaddr) int ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route, - struct socket *sock, int type) + struct socket *sock, int type) { rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; LIST_HEAD(zombies); @@ -1026,12 +1047,12 @@ ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route, int active; char *warn = NULL; - active = (route != NULL); + active = !!route; LASSERT(active == (type != SOCKLND_CONN_NONE)); LIBCFS_ALLOC(conn, sizeof(*conn)); - if (conn == NULL) { + if (!conn) { rc = -ENOMEM; goto failed_0; } @@ -1039,8 +1060,10 @@ ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route, conn->ksnc_peer = NULL; conn->ksnc_route = NULL; conn->ksnc_sock = sock; - /* 2 ref, 1 for conn, another extra ref prevents socket - * being closed before establishment of connection */ + /* + * 2 ref, 1 for conn, another extra ref prevents socket + * being closed before establishment of connection + */ atomic_set(&conn->ksnc_sock_refcount, 2); conn->ksnc_type = type; ksocknal_lib_save_callback(sock, conn); @@ -1057,21 +1080,22 @@ ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route, LIBCFS_ALLOC(hello, offsetof(ksock_hello_msg_t, kshm_ips[LNET_MAX_INTERFACES])); - if (hello == NULL) { + if (!hello) { rc = -ENOMEM; goto failed_1; } /* stash conn's local and remote addrs */ rc = ksocknal_lib_get_conn_addrs(conn); - if (rc != 0) + if (rc) goto failed_1; - /* Find out/confirm peer's NID and connection type and get the + /* + * Find out/confirm peer's NID and connection type and get the * vector of interfaces she's willing to let me connect to. * Passive connections use the listener timeout since the peer sends - * eagerly */ - + * eagerly + */ if (active) { peer = route->ksnr_peer; LASSERT(ni == peer->ksnp_ni); @@ -1084,7 +1108,7 @@ ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route, conn->ksnc_proto = peer->ksnp_proto; write_unlock_bh(global_lock); - if (conn->ksnc_proto == NULL) { + if (!conn->ksnc_proto) { conn->ksnc_proto = &ksocknal_protocol_v3x; #if SOCKNAL_VERSION_DEBUG if (*ksocknal_tunables.ksnd_protocol == 2) @@ -1095,7 +1119,7 @@ ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route, } rc = ksocknal_send_hello(ni, conn, peerid.nid, hello); - if (rc != 0) + if (rc) goto failed_1; } else { peerid.nid = LNET_NID_ANY; @@ -1109,8 +1133,8 @@ ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route, if (rc < 0) goto failed_1; - LASSERT(rc == 0 || active); - LASSERT(conn->ksnc_proto != NULL); + LASSERT(!rc || active); + LASSERT(conn->ksnc_proto); LASSERT(peerid.nid != LNET_NID_ANY); cpt = lnet_cpt_of_nid(peerid.nid); @@ -1120,20 +1144,22 @@ ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route, write_lock_bh(global_lock); } else { rc = ksocknal_create_peer(&peer, ni, peerid); - if (rc != 0) + if (rc) goto failed_1; write_lock_bh(global_lock); /* called with a ref on ni, so shutdown can't have started */ - LASSERT(((ksock_net_t *) ni->ni_data)->ksnn_shutdown == 0); + LASSERT(!((ksock_net_t *) ni->ni_data)->ksnn_shutdown); peer2 = ksocknal_find_peer_locked(ni, peerid); - if (peer2 == NULL) { - /* NB this puts an "empty" peer in the peer - * table (which takes my ref) */ + if (!peer2) { + /* + * NB this puts an "empty" peer in the peer + * table (which takes my ref) + */ list_add_tail(&peer->ksnp_list, - ksocknal_nid2peerlist(peerid.nid)); + ksocknal_nid2peerlist(peerid.nid)); } else { ksocknal_peer_decref(peer); peer = peer2; @@ -1143,8 +1169,10 @@ ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route, ksocknal_peer_addref(peer); peer->ksnp_accepting++; - /* Am I already connecting to this guy? Resolve in - * favour of higher NID... */ + /* + * Am I already connecting to this guy? Resolve in + * favour of higher NID... + */ if (peerid.nid < ni->ni_nid && ksocknal_connecting(peer, conn->ksnc_ipaddr)) { rc = EALREADY; @@ -1161,8 +1189,9 @@ ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route, goto failed_2; } - if (peer->ksnp_proto == NULL) { - /* Never connected before. + if (!peer->ksnp_proto) { + /* + * Never connected before. * NB recv_hello may have returned EPROTO to signal my peer * wants a different protocol than the one I asked for. */ @@ -1198,8 +1227,10 @@ ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route, goto failed_2; } - /* Refuse to duplicate an existing connection, unless this is a - * loopback connection */ + /* + * Refuse to duplicate an existing connection, unless this is a + * loopback connection + */ if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) { list_for_each(tmp, &peer->ksnp_conns) { conn2 = list_entry(tmp, ksock_conn_t, ksnc_list); @@ -1209,9 +1240,11 @@ ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route, conn2->ksnc_type != conn->ksnc_type) continue; - /* Reply on a passive connection attempt so the peer - * realises we're connected. */ - LASSERT(rc == 0); + /* + * Reply on a passive connection attempt so the peer + * realises we're connected. + */ + LASSERT(!rc); if (!active) rc = EALREADY; @@ -1220,9 +1253,11 @@ ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route, } } - /* If the connection created by this route didn't bind to the IP + /* + * If the connection created by this route didn't bind to the IP * address the route connected to, the connection/route matching - * code below probably isn't going to work. */ + * code below probably isn't going to work. + */ if (active && route->ksnr_ipaddr != conn->ksnc_ipaddr) { CERROR("Route %s %pI4h connected to %pI4h\n", @@ -1231,10 +1266,12 @@ ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route, &conn->ksnc_ipaddr); } - /* Search for a route corresponding to the new connection and + /* + * Search for a route corresponding to the new connection and * create an association. This allows incoming connections created * by routes in my peer to match my own route entries so I don't - * continually create duplicate routes. */ + * continually create duplicate routes. + */ list_for_each(tmp, &peer->ksnp_routes) { route = list_entry(tmp, ksock_route_t, ksnr_list); @@ -1278,14 +1315,14 @@ ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route, write_unlock_bh(global_lock); - /* We've now got a new connection. Any errors from here on are just + /* + * We've now got a new connection. Any errors from here on are just * like "normal" comms errors and we close the connection normally. * NB (a) we still have to send the reply HELLO for passive * connections, * (b) normal I/O on the conn is blocked until I setup and call the * socket callbacks. */ - CDEBUG(D_NET, "New conn %s p %d.x %pI4h -> %pI4h/%d incarnation:%lld sched[%d:%d]\n", libcfs_id2str(peerid), conn->ksnc_proto->pro_version, &conn->ksnc_myipaddr, &conn->ksnc_ipaddr, @@ -1305,12 +1342,14 @@ ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route, LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t, kshm_ips[LNET_MAX_INTERFACES])); - /* setup the socket AFTER I've received hello (it disables + /* + * setup the socket AFTER I've received hello (it disables * SO_LINGER). I might call back to the acceptor who may want * to send a protocol version response and then close the * socket; this ensures the socket only tears down after the - * response has been sent. */ - if (rc == 0) + * response has been sent. + */ + if (!rc) rc = ksocknal_lib_setup_sock(sock); write_lock_bh(global_lock); @@ -1323,14 +1362,14 @@ ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route, write_unlock_bh(global_lock); - if (rc != 0) { + if (rc) { write_lock_bh(global_lock); if (!conn->ksnc_closing) { /* could be closed by another thread */ ksocknal_close_conn_locked(conn, rc); } write_unlock_bh(global_lock); - } else if (ksocknal_connsock_addref(conn) == 0) { + } else if (!ksocknal_connsock_addref(conn)) { /* Allow I/O to proceed. */ ksocknal_read_callback(conn); ksocknal_write_callback(conn); @@ -1352,19 +1391,21 @@ ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route, write_unlock_bh(global_lock); - if (warn != NULL) { + if (warn) { if (rc < 0) CERROR("Not creating conn %s type %d: %s\n", libcfs_id2str(peerid), conn->ksnc_type, warn); else CDEBUG(D_NET, "Not creating conn %s type %d: %s\n", - libcfs_id2str(peerid), conn->ksnc_type, warn); + libcfs_id2str(peerid), conn->ksnc_type, warn); } if (!active) { if (rc > 0) { - /* Request retry by replying with CONN_NONE - * ksnc_proto has been set already */ + /* + * Request retry by replying with CONN_NONE + * ksnc_proto has been set already + */ conn->ksnc_type = SOCKLND_CONN_NONE; hello->kshm_nips = 0; ksocknal_send_hello(ni, conn, peerid.nid, hello); @@ -1379,7 +1420,7 @@ ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route, ksocknal_peer_decref(peer); failed_1: - if (hello != NULL) + if (hello) LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t, kshm_ips[LNET_MAX_INTERFACES])); @@ -1393,15 +1434,17 @@ failed_0: void ksocknal_close_conn_locked(ksock_conn_t *conn, int error) { - /* This just does the immmediate housekeeping, and queues the + /* + * This just does the immmediate housekeeping, and queues the * connection for the reaper to terminate. - * Caller holds ksnd_global_lock exclusively in irq context */ + * Caller holds ksnd_global_lock exclusively in irq context + */ ksock_peer_t *peer = conn->ksnc_peer; ksock_route_t *route; ksock_conn_t *conn2; struct list_head *tmp; - LASSERT(peer->ksnp_error == 0); + LASSERT(!peer->ksnp_error); LASSERT(!conn->ksnc_closing); conn->ksnc_closing = 1; @@ -1409,10 +1452,10 @@ ksocknal_close_conn_locked(ksock_conn_t *conn, int error) list_del(&conn->ksnc_list); route = conn->ksnc_route; - if (route != NULL) { + if (route) { /* dissociate conn from route... */ LASSERT(!route->ksnr_deleted); - LASSERT((route->ksnr_connected & (1 << conn->ksnc_type)) != 0); + LASSERT(route->ksnr_connected & (1 << conn->ksnc_type)); conn2 = NULL; list_for_each(tmp, &peer->ksnp_conns) { @@ -1424,7 +1467,7 @@ ksocknal_close_conn_locked(ksock_conn_t *conn, int error) conn2 = NULL; } - if (conn2 == NULL) + if (!conn2) route->ksnr_connected &= ~(1 << conn->ksnc_type); conn->ksnc_route = NULL; @@ -1445,15 +1488,17 @@ ksocknal_close_conn_locked(ksock_conn_t *conn, int error) LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x); - /* throw them to the last connection..., - * these TXs will be send to /dev/null by scheduler */ + /* + * throw them to the last connection..., + * these TXs will be send to /dev/null by scheduler + */ list_for_each_entry(tx, &peer->ksnp_tx_queue, - tx_list) + tx_list) ksocknal_tx_prep(conn, tx); spin_lock_bh(&conn->ksnc_scheduler->kss_lock); list_splice_init(&peer->ksnp_tx_queue, - &conn->ksnc_tx_queue); + &conn->ksnc_tx_queue); spin_unlock_bh(&conn->ksnc_scheduler->kss_lock); } @@ -1461,8 +1506,10 @@ ksocknal_close_conn_locked(ksock_conn_t *conn, int error) peer->ksnp_error = error; /* stash last conn close reason */ if (list_empty(&peer->ksnp_routes)) { - /* I've just closed last conn belonging to a - * peer with no routes to it */ + /* + * I've just closed last conn belonging to a + * peer with no routes to it + */ ksocknal_unlink_peer_locked(peer); } } @@ -1470,7 +1517,7 @@ ksocknal_close_conn_locked(ksock_conn_t *conn, int error) spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); list_add_tail(&conn->ksnc_list, - &ksocknal_data.ksnd_deathrow_conns); + &ksocknal_data.ksnd_deathrow_conns); wake_up(&ksocknal_data.ksnd_reaper_waitq); spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock); @@ -1482,16 +1529,17 @@ ksocknal_peer_failed(ksock_peer_t *peer) int notify = 0; unsigned long last_alive = 0; - /* There has been a connection failure or comms error; but I'll only + /* + * There has been a connection failure or comms error; but I'll only * tell LNET I think the peer is dead if it's to another kernel and - * there are no connections or connection attempts in existence. */ - + * there are no connections or connection attempts in existence. + */ read_lock(&ksocknal_data.ksnd_global_lock); - if ((peer->ksnp_id.pid & LNET_PID_USERFLAG) == 0 && + if (!(peer->ksnp_id.pid & LNET_PID_USERFLAG) && list_empty(&peer->ksnp_conns) && - peer->ksnp_accepting == 0 && - ksocknal_find_connecting_route_locked(peer) == NULL) { + !peer->ksnp_accepting && + !ksocknal_find_connecting_route_locked(peer)) { notify = 1; last_alive = peer->ksnp_last_alive; } @@ -1500,7 +1548,7 @@ ksocknal_peer_failed(ksock_peer_t *peer) if (notify) lnet_notify(peer->ksnp_ni, peer->ksnp_id.nid, 0, - last_alive); + last_alive); } void @@ -1508,12 +1556,15 @@ ksocknal_finalize_zcreq(ksock_conn_t *conn) { ksock_peer_t *peer = conn->ksnc_peer; ksock_tx_t *tx; + ksock_tx_t *temp; ksock_tx_t *tmp; LIST_HEAD(zlist); - /* NB safe to finalize TXs because closing of socket will - * abort all buffered data */ - LASSERT(conn->ksnc_sock == NULL); + /* + * NB safe to finalize TXs because closing of socket will + * abort all buffered data + */ + LASSERT(!conn->ksnc_sock); spin_lock(&peer->ksnp_lock); @@ -1521,7 +1572,7 @@ ksocknal_finalize_zcreq(ksock_conn_t *conn) if (tx->tx_conn != conn) continue; - LASSERT(tx->tx_msg.ksm_zc_cookies[0] != 0); + LASSERT(tx->tx_msg.ksm_zc_cookies[0]); tx->tx_msg.ksm_zc_cookies[0] = 0; tx->tx_zc_aborted = 1; /* mark it as not-acked */ @@ -1531,9 +1582,7 @@ ksocknal_finalize_zcreq(ksock_conn_t *conn) spin_unlock(&peer->ksnp_lock); - while (!list_empty(&zlist)) { - tx = list_entry(zlist.next, ksock_tx_t, tx_zc_list); - + list_for_each_entry_safe(tx, temp, &zlist, tx_zc_list) { list_del(&tx->tx_zc_list); ksocknal_tx_decref(tx); } @@ -1542,10 +1591,12 @@ ksocknal_finalize_zcreq(ksock_conn_t *conn) void ksocknal_terminate_conn(ksock_conn_t *conn) { - /* This gets called by the reaper (guaranteed thread context) to + /* + * This gets called by the reaper (guaranteed thread context) to * disengage the socket from its callbacks and close it. * ksnc_refcount will eventually hit zero, and then the reaper will - * destroy it. */ + * destroy it. + */ ksock_peer_t *peer = conn->ksnc_peer; ksock_sched_t *sched = conn->ksnc_scheduler; int failed = 0; @@ -1561,7 +1612,7 @@ ksocknal_terminate_conn(ksock_conn_t *conn) if (!conn->ksnc_tx_scheduled && !list_empty(&conn->ksnc_tx_queue)) { list_add_tail(&conn->ksnc_tx_list, - &sched->kss_tx_conns); + &sched->kss_tx_conns); conn->ksnc_tx_scheduled = 1; /* extra ref for scheduler */ ksocknal_conn_addref(conn); @@ -1576,11 +1627,13 @@ ksocknal_terminate_conn(ksock_conn_t *conn) ksocknal_lib_reset_callback(conn->ksnc_sock, conn); - /* OK, so this conn may not be completely disengaged from its - * scheduler yet, but it _has_ committed to terminate... */ + /* + * OK, so this conn may not be completely disengaged from its + * scheduler yet, but it _has_ committed to terminate... + */ conn->ksnc_scheduler->kss_nconns--; - if (peer->ksnp_error != 0) { + if (peer->ksnp_error) { /* peer's last conn closed in error */ LASSERT(list_empty(&peer->ksnp_conns)); failed = 1; @@ -1592,11 +1645,13 @@ ksocknal_terminate_conn(ksock_conn_t *conn) if (failed) ksocknal_peer_failed(peer); - /* The socket is closed on the final put; either here, or in + /* + * The socket is closed on the final put; either here, or in * ksocknal_{send,recv}msg(). Since we set up the linger2 option * when the connection was established, this will close the socket * immediately, aborting anything buffered in it. Any hung - * zero-copy transmits will therefore complete in finite time. */ + * zero-copy transmits will therefore complete in finite time. + */ ksocknal_connsock_decref(conn); } @@ -1605,7 +1660,7 @@ ksocknal_queue_zombie_conn(ksock_conn_t *conn) { /* Queue the conn for the reaper to destroy */ - LASSERT(atomic_read(&conn->ksnc_conn_refcount) == 0); + LASSERT(!atomic_read(&conn->ksnc_conn_refcount)); spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns); @@ -1622,10 +1677,10 @@ ksocknal_destroy_conn(ksock_conn_t *conn) /* Final coup-de-grace of the reaper */ CDEBUG(D_NET, "connection %p\n", conn); - LASSERT(atomic_read(&conn->ksnc_conn_refcount) == 0); - LASSERT(atomic_read(&conn->ksnc_sock_refcount) == 0); - LASSERT(conn->ksnc_sock == NULL); - LASSERT(conn->ksnc_route == NULL); + LASSERT(!atomic_read(&conn->ksnc_conn_refcount)); + LASSERT(!atomic_read(&conn->ksnc_sock_refcount)); + LASSERT(!conn->ksnc_sock); + LASSERT(!conn->ksnc_route); LASSERT(!conn->ksnc_tx_scheduled); LASSERT(!conn->ksnc_rx_scheduled); LASSERT(list_empty(&conn->ksnc_tx_queue)); @@ -1642,7 +1697,7 @@ ksocknal_destroy_conn(ksock_conn_t *conn) cfs_duration_sec(cfs_time_sub(cfs_time_current(), last_rcv))); lnet_finalize(conn->ksnc_peer->ksnp_ni, - conn->ksnc_cookie, -EIO); + conn->ksnc_cookie, -EIO); break; case SOCKNAL_RX_LNET_HEADER: if (conn->ksnc_rx_started) @@ -1685,8 +1740,7 @@ ksocknal_close_peer_conns_locked(ksock_peer_t *peer, __u32 ipaddr, int why) list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) { conn = list_entry(ctmp, ksock_conn_t, ksnc_list); - if (ipaddr == 0 || - conn->ksnc_ipaddr == ipaddr) { + if (!ipaddr || conn->ksnc_ipaddr == ipaddr) { count++; ksocknal_close_conn_locked(conn, why); } @@ -1724,17 +1778,17 @@ ksocknal_close_matching_conns(lnet_process_id_t id, __u32 ipaddr) write_lock_bh(&ksocknal_data.ksnd_global_lock); - if (id.nid != LNET_NID_ANY) - lo = hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers); - else { + if (id.nid != LNET_NID_ANY) { + lo = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers); + hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers); + } else { lo = 0; hi = ksocknal_data.ksnd_peer_hash_size - 1; } for (i = lo; i <= hi; i++) { list_for_each_safe(ptmp, pnxt, - &ksocknal_data.ksnd_peers[i]) { - + &ksocknal_data.ksnd_peers[i]) { peer = list_entry(ptmp, ksock_peer_t, ksnp_list); if (!((id.nid == LNET_NID_ANY || id.nid == peer->ksnp_id.nid) && @@ -1748,10 +1802,10 @@ ksocknal_close_matching_conns(lnet_process_id_t id, __u32 ipaddr) write_unlock_bh(&ksocknal_data.ksnd_global_lock); /* wildcards always succeed */ - if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || ipaddr == 0) + if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || !ipaddr) return 0; - if (count == 0) + if (!count) return -ENOENT; else return 0; @@ -1760,15 +1814,17 @@ ksocknal_close_matching_conns(lnet_process_id_t id, __u32 ipaddr) void ksocknal_notify(lnet_ni_t *ni, lnet_nid_t gw_nid, int alive) { - /* The router is telling me she's been notified of a change in - * gateway state.... */ + /* + * The router is telling me she's been notified of a change in + * gateway state.... + */ lnet_process_id_t id = {0}; id.nid = gw_nid; id.pid = LNET_PID_ANY; CDEBUG(D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid), - alive ? "up" : "down"); + alive ? "up" : "down"); if (!alive) { /* If the gateway crashed, close all open connections... */ @@ -1776,8 +1832,10 @@ ksocknal_notify(lnet_ni_t *ni, lnet_nid_t gw_nid, int alive) return; } - /* ...otherwise do nothing. We can only establish new connections - * if we have autroutes, and these connect on demand. */ + /* + * ...otherwise do nothing. We can only establish new connections + * if we have autroutes, and these connect on demand. + */ } void @@ -1788,12 +1846,15 @@ ksocknal_query(lnet_ni_t *ni, lnet_nid_t nid, unsigned long *when) unsigned long now = cfs_time_current(); ksock_peer_t *peer = NULL; rwlock_t *glock = &ksocknal_data.ksnd_global_lock; - lnet_process_id_t id = {.nid = nid, .pid = LUSTRE_SRV_LNET_PID}; + lnet_process_id_t id = { + .nid = nid, + .pid = LNET_PID_LUSTRE, + }; read_lock(glock); peer = ksocknal_find_peer_locked(ni, id); - if (peer != NULL) { + if (peer) { struct list_head *tmp; ksock_conn_t *conn; int bufnob; @@ -1812,13 +1873,13 @@ ksocknal_query(lnet_ni_t *ni, lnet_nid_t nid, unsigned long *when) } last_alive = peer->ksnp_last_alive; - if (ksocknal_find_connectable_route_locked(peer) == NULL) + if (!ksocknal_find_connectable_route_locked(peer)) connect = 0; } read_unlock(glock); - if (last_alive != 0) + if (last_alive) *when = last_alive; CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago, connect %d\n", @@ -1834,7 +1895,7 @@ ksocknal_query(lnet_ni_t *ni, lnet_nid_t nid, unsigned long *when) write_lock_bh(glock); peer = ksocknal_find_peer_locked(ni, id); - if (peer != NULL) + if (peer) ksocknal_launch_all_connections_locked(peer); write_unlock_bh(glock); @@ -1857,7 +1918,7 @@ ksocknal_push_peer(ksock_peer_t *peer) list_for_each(tmp, &peer->ksnp_conns) { if (i++ == index) { conn = list_entry(tmp, ksock_conn_t, - ksnc_list); + ksnc_list); ksocknal_conn_addref(conn); break; } @@ -1865,7 +1926,7 @@ ksocknal_push_peer(ksock_peer_t *peer) read_unlock(&ksocknal_data.ksnd_global_lock); - if (conn == NULL) + if (!conn) break; ksocknal_lib_push_conn(conn); @@ -1885,7 +1946,8 @@ static int ksocknal_push(lnet_ni_t *ni, lnet_process_id_t id) start = &ksocknal_data.ksnd_peers[0]; end = &ksocknal_data.ksnd_peers[hsize - 1]; } else { - start = end = ksocknal_nid2peerlist(id.nid); + start = ksocknal_nid2peerlist(id.nid); + end = ksocknal_nid2peerlist(id.nid); } for (tmp = start; tmp <= end; tmp++) { @@ -1910,7 +1972,7 @@ static int ksocknal_push(lnet_ni_t *ni, lnet_process_id_t id) } read_unlock(&ksocknal_data.ksnd_global_lock); - if (i == 0) /* no match */ + if (!i) /* no match */ break; rc = 0; @@ -1934,14 +1996,13 @@ ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask) struct list_head *rtmp; ksock_route_t *route; - if (ipaddress == 0 || - netmask == 0) + if (!ipaddress || !netmask) return -EINVAL; write_lock_bh(&ksocknal_data.ksnd_global_lock); iface = ksocknal_ip2iface(ni, ipaddress); - if (iface != NULL) { + if (iface) { /* silently ignore dups */ rc = 0; } else if (net->ksnn_ninterfaces == LNET_MAX_INTERFACES) { @@ -1957,16 +2018,15 @@ ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask) for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) { peer = list_entry(ptmp, ksock_peer_t, - ksnp_list); + ksnp_list); for (j = 0; j < peer->ksnp_n_passive_ips; j++) if (peer->ksnp_passive_ips[j] == ipaddress) iface->ksni_npeers++; list_for_each(rtmp, &peer->ksnp_routes) { - route = list_entry(rtmp, - ksock_route_t, - ksnr_list); + route = list_entry(rtmp, ksock_route_t, + ksnr_list); if (route->ksnr_myipaddr == ipaddress) iface->ksni_nroutes++; @@ -1995,8 +2055,8 @@ ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr) for (i = 0; i < peer->ksnp_n_passive_ips; i++) if (peer->ksnp_passive_ips[i] == ipaddr) { - for (j = i+1; j < peer->ksnp_n_passive_ips; j++) - peer->ksnp_passive_ips[j-1] = + for (j = i + 1; j < peer->ksnp_n_passive_ips; j++) + peer->ksnp_passive_ips[j - 1] = peer->ksnp_passive_ips[j]; peer->ksnp_n_passive_ips--; break; @@ -2008,7 +2068,7 @@ ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr) if (route->ksnr_myipaddr != ipaddr) continue; - if (route->ksnr_share_count != 0) { + if (route->ksnr_share_count) { /* Manually created; keep, but unbind */ route->ksnr_myipaddr = 0; } else { @@ -2041,23 +2101,21 @@ ksocknal_del_interface(lnet_ni_t *ni, __u32 ipaddress) for (i = 0; i < net->ksnn_ninterfaces; i++) { this_ip = net->ksnn_interfaces[i].ksni_ipaddr; - if (!(ipaddress == 0 || - ipaddress == this_ip)) + if (!(!ipaddress || ipaddress == this_ip)) continue; rc = 0; - for (j = i+1; j < net->ksnn_ninterfaces; j++) - net->ksnn_interfaces[j-1] = + for (j = i + 1; j < net->ksnn_ninterfaces; j++) + net->ksnn_interfaces[j - 1] = net->ksnn_interfaces[j]; net->ksnn_ninterfaces--; for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) { list_for_each_safe(tmp, nxt, - &ksocknal_data.ksnd_peers[j]) { - peer = list_entry(tmp, ksock_peer_t, - ksnp_list); + &ksocknal_data.ksnd_peers[j]) { + peer = list_entry(tmp, ksock_peer_t, ksnp_list); if (peer->ksnp_ni != ni) continue; @@ -2121,7 +2179,7 @@ ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) rc = ksocknal_get_peer_info(ni, data->ioc_count, &id, &myip, &ip, &port, &conn_count, &share_count); - if (rc != 0) + if (rc) return rc; data->ioc_nid = id.nid; @@ -2136,7 +2194,7 @@ ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) case IOC_LIBCFS_ADD_PEER: id.nid = data->ioc_nid; - id.pid = LUSTRE_SRV_LNET_PID; + id.pid = LNET_PID_LUSTRE; return ksocknal_add_peer(ni, id, data->ioc_u32[0], /* IP */ data->ioc_u32[1]); /* port */ @@ -2153,7 +2211,7 @@ ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) int nagle; ksock_conn_t *conn = ksocknal_get_conn_by_idx(ni, data->ioc_count); - if (conn == NULL) + if (!conn) return -ENOENT; ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle); @@ -2202,14 +2260,14 @@ ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) static void ksocknal_free_buffers(void) { - LASSERT(atomic_read(&ksocknal_data.ksnd_nactive_txs) == 0); + LASSERT(!atomic_read(&ksocknal_data.ksnd_nactive_txs)); - if (ksocknal_data.ksnd_sched_info != NULL) { + if (ksocknal_data.ksnd_sched_info) { struct ksock_sched_info *info; int i; cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) { - if (info->ksi_scheds != NULL) { + if (info->ksi_scheds) { LIBCFS_FREE(info->ksi_scheds, info->ksi_nthreads_max * sizeof(info->ksi_scheds[0])); @@ -2219,21 +2277,21 @@ ksocknal_free_buffers(void) } LIBCFS_FREE(ksocknal_data.ksnd_peers, - sizeof(struct list_head) * - ksocknal_data.ksnd_peer_hash_size); + sizeof(struct list_head) * + ksocknal_data.ksnd_peer_hash_size); spin_lock(&ksocknal_data.ksnd_tx_lock); if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) { struct list_head zlist; ksock_tx_t *tx; + ksock_tx_t *temp; list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs); list_del_init(&ksocknal_data.ksnd_idle_noop_txs); spin_unlock(&ksocknal_data.ksnd_tx_lock); - while (!list_empty(&zlist)) { - tx = list_entry(zlist.next, ksock_tx_t, tx_list); + list_for_each_entry_safe(tx, temp, &zlist, tx_list) { list_del(&tx->tx_list); LIBCFS_FREE(tx, tx->tx_desc_size); } @@ -2250,7 +2308,7 @@ ksocknal_base_shutdown(void) int i; int j; - LASSERT(ksocknal_data.ksnd_nnets == 0); + LASSERT(!ksocknal_data.ksnd_nnets); switch (ksocknal_data.ksnd_init) { default: @@ -2258,7 +2316,7 @@ ksocknal_base_shutdown(void) case SOCKNAL_INIT_ALL: case SOCKNAL_INIT_DATA: - LASSERT(ksocknal_data.ksnd_peers != NULL); + LASSERT(ksocknal_data.ksnd_peers); for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) LASSERT(list_empty(&ksocknal_data.ksnd_peers[i])); @@ -2268,14 +2326,13 @@ ksocknal_base_shutdown(void) LASSERT(list_empty(&ksocknal_data.ksnd_connd_connreqs)); LASSERT(list_empty(&ksocknal_data.ksnd_connd_routes)); - if (ksocknal_data.ksnd_sched_info != NULL) { + if (ksocknal_data.ksnd_sched_info) { cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) { - if (info->ksi_scheds == NULL) + if (!info->ksi_scheds) continue; for (j = 0; j < info->ksi_nthreads_max; j++) { - sched = &info->ksi_scheds[j]; LASSERT(list_empty( &sched->kss_tx_conns)); @@ -2283,7 +2340,7 @@ ksocknal_base_shutdown(void) &sched->kss_rx_conns)); LASSERT(list_empty( &sched->kss_zombie_noop_txs)); - LASSERT(sched->kss_nconns == 0); + LASSERT(!sched->kss_nconns); } } } @@ -2293,10 +2350,10 @@ ksocknal_base_shutdown(void) wake_up_all(&ksocknal_data.ksnd_connd_waitq); wake_up_all(&ksocknal_data.ksnd_reaper_waitq); - if (ksocknal_data.ksnd_sched_info != NULL) { + if (ksocknal_data.ksnd_sched_info) { cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) { - if (info->ksi_scheds == NULL) + if (!info->ksi_scheds) continue; for (j = 0; j < info->ksi_nthreads_max; j++) { @@ -2308,7 +2365,7 @@ ksocknal_base_shutdown(void) i = 4; read_lock(&ksocknal_data.ksnd_global_lock); - while (ksocknal_data.ksnd_nthreads != 0) { + while (ksocknal_data.ksnd_nthreads) { i++; CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ "waiting for %d threads to terminate\n", @@ -2332,7 +2389,6 @@ ksocknal_base_shutdown(void) static __u64 ksocknal_new_incarnation(void) { - /* The incarnation number is the time this module loaded and it * identifies this particular instance of the socknal. */ @@ -2347,15 +2403,15 @@ ksocknal_base_startup(void) int i; LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); - LASSERT(ksocknal_data.ksnd_nnets == 0); + LASSERT(!ksocknal_data.ksnd_nnets); memset(&ksocknal_data, 0, sizeof(ksocknal_data)); /* zero pointers */ ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE; LIBCFS_ALLOC(ksocknal_data.ksnd_peers, - sizeof(struct list_head) * - ksocknal_data.ksnd_peer_hash_size); - if (ksocknal_data.ksnd_peers == NULL) + sizeof(struct list_head) * + ksocknal_data.ksnd_peer_hash_size); + if (!ksocknal_data.ksnd_peers) return -ENOMEM; for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) @@ -2386,7 +2442,7 @@ ksocknal_base_startup(void) ksocknal_data.ksnd_sched_info = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*info)); - if (ksocknal_data.ksnd_sched_info == NULL) + if (!ksocknal_data.ksnd_sched_info) goto failed; cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) { @@ -2397,8 +2453,10 @@ ksocknal_base_startup(void) if (*ksocknal_tunables.ksnd_nscheds > 0) { nthrs = min(nthrs, *ksocknal_tunables.ksnd_nscheds); } else { - /* max to half of CPUs, assume another half should be - * reserved for upper layer modules */ + /* + * max to half of CPUs, assume another half should be + * reserved for upper layer modules + */ nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs); } @@ -2407,7 +2465,7 @@ ksocknal_base_startup(void) LIBCFS_CPT_ALLOC(info->ksi_scheds, lnet_cpt_table(), i, info->ksi_nthreads_max * sizeof(*sched)); - if (info->ksi_scheds == NULL) + if (!info->ksi_scheds) goto failed; for (; nthrs > 0; nthrs--) { @@ -2425,8 +2483,10 @@ ksocknal_base_startup(void) ksocknal_data.ksnd_connd_starting = 0; ksocknal_data.ksnd_connd_failed_stamp = 0; ksocknal_data.ksnd_connd_starting_stamp = ktime_get_real_seconds(); - /* must have at least 2 connds to remain responsive to accepts while - * connecting */ + /* + * must have at least 2 connds to remain responsive to accepts while + * connecting + */ if (*ksocknal_tunables.ksnd_nconnds < SOCKNAL_CONND_RESV + 1) *ksocknal_tunables.ksnd_nconnds = SOCKNAL_CONND_RESV + 1; @@ -2446,7 +2506,7 @@ ksocknal_base_startup(void) snprintf(name, sizeof(name), "socknal_cd%02d", i); rc = ksocknal_thread_start(ksocknal_connd, (void *)((ulong_ptr_t)i), name); - if (rc != 0) { + if (rc) { spin_lock_bh(&ksocknal_data.ksnd_connd_lock); ksocknal_data.ksnd_connd_starting--; spin_unlock_bh(&ksocknal_data.ksnd_connd_lock); @@ -2456,7 +2516,7 @@ ksocknal_base_startup(void) } rc = ksocknal_thread_start(ksocknal_reaper, NULL, "socknal_reaper"); - if (rc != 0) { + if (rc) { CERROR("Can't spawn socknal reaper: %d\n", rc); goto failed; } @@ -2491,7 +2551,7 @@ ksocknal_debug_peerhash(lnet_ni_t *ni) } } - if (peer != NULL) { + if (peer) { ksock_route_t *route; ksock_conn_t *conn; @@ -2515,9 +2575,9 @@ ksocknal_debug_peerhash(lnet_ni_t *ni) list_for_each(tmp, &peer->ksnp_conns) { conn = list_entry(tmp, ksock_conn_t, ksnc_list); CWARN("Conn: ref %d, sref %d, t %d, c %d\n", - atomic_read(&conn->ksnc_conn_refcount), - atomic_read(&conn->ksnc_sock_refcount), - conn->ksnc_type, conn->ksnc_closing); + atomic_read(&conn->ksnc_conn_refcount), + atomic_read(&conn->ksnc_sock_refcount), + conn->ksnc_type, conn->ksnc_closing); } } @@ -2548,7 +2608,7 @@ ksocknal_shutdown(lnet_ni_t *ni) /* Wait for all peer state to clean up */ i = 2; spin_lock_bh(&net->ksnn_lock); - while (net->ksnn_npeers != 0) { + while (net->ksnn_npeers) { spin_unlock_bh(&net->ksnn_lock); i++; @@ -2565,15 +2625,15 @@ ksocknal_shutdown(lnet_ni_t *ni) spin_unlock_bh(&net->ksnn_lock); for (i = 0; i < net->ksnn_ninterfaces; i++) { - LASSERT(net->ksnn_interfaces[i].ksni_npeers == 0); - LASSERT(net->ksnn_interfaces[i].ksni_nroutes == 0); + LASSERT(!net->ksnn_interfaces[i].ksni_npeers); + LASSERT(!net->ksnn_interfaces[i].ksni_nroutes); } list_del(&net->ksnn_list); LIBCFS_FREE(net, sizeof(*net)); ksocknal_data.ksnd_nnets--; - if (ksocknal_data.ksnd_nnets == 0) + if (!ksocknal_data.ksnd_nnets) ksocknal_base_shutdown(); } @@ -2601,7 +2661,7 @@ ksocknal_enumerate_interfaces(ksock_net_t *net) continue; rc = lnet_ipif_query(names[i], &up, &ip, &mask); - if (rc != 0) { + if (rc) { CWARN("Can't get interface %s info: %d\n", names[i], rc); continue; @@ -2628,7 +2688,7 @@ ksocknal_enumerate_interfaces(ksock_net_t *net) lnet_ipif_free_enumeration(names, n); - if (j == 0) + if (!j) CERROR("Can't find any usable interfaces\n"); return j; @@ -2647,21 +2707,20 @@ ksocknal_search_new_ipif(ksock_net_t *net) ksock_net_t *tmp; int j; - if (colon != NULL) /* ignore alias device */ + if (colon) /* ignore alias device */ *colon = 0; - list_for_each_entry(tmp, &ksocknal_data.ksnd_nets, - ksnn_list) { + list_for_each_entry(tmp, &ksocknal_data.ksnd_nets, ksnn_list) { for (j = 0; !found && j < tmp->ksnn_ninterfaces; j++) { char *ifnam2 = &tmp->ksnn_interfaces[j].ksni_name[0]; char *colon2 = strchr(ifnam2, ':'); - if (colon2 != NULL) + if (colon2) *colon2 = 0; - found = strcmp(ifnam, ifnam2) == 0; - if (colon2 != NULL) + found = !strcmp(ifnam, ifnam2); + if (colon2) *colon2 = ':'; } if (found) @@ -2669,7 +2728,7 @@ ksocknal_search_new_ipif(ksock_net_t *net) } new_ipif += !found; - if (colon != NULL) + if (colon) *colon = ':'; } @@ -2683,7 +2742,7 @@ ksocknal_start_schedulers(struct ksock_sched_info *info) int rc = 0; int i; - if (info->ksi_nthreads == 0) { + if (!info->ksi_nthreads) { if (*ksocknal_tunables.ksnd_nscheds > 0) { nthrs = info->ksi_nthreads_max; } else { @@ -2711,7 +2770,7 @@ ksocknal_start_schedulers(struct ksock_sched_info *info) rc = ksocknal_thread_start(ksocknal_scheduler, (void *)id, name); - if (rc == 0) + if (!rc) continue; CERROR("Can't spawn thread %d for scheduler[%d]: %d\n", @@ -2734,7 +2793,7 @@ ksocknal_net_start_threads(ksock_net_t *net, __u32 *cpts, int ncpts) for (i = 0; i < ncpts; i++) { struct ksock_sched_info *info; - int cpt = (cpts == NULL) ? i : cpts[i]; + int cpt = !cpts ? i : cpts[i]; LASSERT(cpt < cfs_cpt_number(lnet_cpt_table())); info = ksocknal_data.ksnd_sched_info[cpt]; @@ -2743,7 +2802,7 @@ ksocknal_net_start_threads(ksock_net_t *net, __u32 *cpts, int ncpts) continue; rc = ksocknal_start_schedulers(info); - if (rc != 0) + if (rc) return rc; } return 0; @@ -2760,12 +2819,12 @@ ksocknal_startup(lnet_ni_t *ni) if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) { rc = ksocknal_base_startup(); - if (rc != 0) + if (rc) return rc; } LIBCFS_ALLOC(net, sizeof(*net)); - if (net == NULL) + if (!net) goto fail_0; spin_lock_init(&net->ksnn_lock); @@ -2776,7 +2835,7 @@ ksocknal_startup(lnet_ni_t *ni) ni->ni_peertxcredits = *ksocknal_tunables.ksnd_peertxcredits; ni->ni_peerrtrcredits = *ksocknal_tunables.ksnd_peerrtrcredits; - if (ni->ni_interfaces[0] == NULL) { + if (!ni->ni_interfaces[0]) { rc = ksocknal_enumerate_interfaces(net); if (rc <= 0) goto fail_1; @@ -2786,14 +2845,14 @@ ksocknal_startup(lnet_ni_t *ni) for (i = 0; i < LNET_MAX_INTERFACES; i++) { int up; - if (ni->ni_interfaces[i] == NULL) + if (!ni->ni_interfaces[i]) break; rc = lnet_ipif_query(ni->ni_interfaces[i], &up, - &net->ksnn_interfaces[i].ksni_ipaddr, - &net->ksnn_interfaces[i].ksni_netmask); + &net->ksnn_interfaces[i].ksni_ipaddr, + &net->ksnn_interfaces[i].ksni_netmask); - if (rc != 0) { + if (rc) { CERROR("Can't get interface %s info: %d\n", ni->ni_interfaces[i], rc); goto fail_1; @@ -2814,7 +2873,7 @@ ksocknal_startup(lnet_ni_t *ni) /* call it before add it to ksocknal_data.ksnd_nets */ rc = ksocknal_net_start_threads(net, ni->ni_cpts, ni->ni_ncpts); - if (rc != 0) + if (rc) goto fail_1; ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), @@ -2828,20 +2887,18 @@ ksocknal_startup(lnet_ni_t *ni) fail_1: LIBCFS_FREE(net, sizeof(*net)); fail_0: - if (ksocknal_data.ksnd_nnets == 0) + if (!ksocknal_data.ksnd_nnets) ksocknal_base_shutdown(); return -ENETDOWN; } -static void __exit -ksocknal_module_fini(void) +static void __exit ksocklnd_exit(void) { lnet_unregister_lnd(&the_ksocklnd); } -static int __init -ksocknal_module_init(void) +static int __init ksocklnd_init(void) { int rc; @@ -2861,7 +2918,7 @@ ksocknal_module_init(void) the_ksocklnd.lnd_accept = ksocknal_accept; rc = ksocknal_tunables_init(); - if (rc != 0) + if (rc) return rc; lnet_register_lnd(&the_ksocklnd); @@ -2870,9 +2927,9 @@ ksocknal_module_init(void) } MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>"); -MODULE_DESCRIPTION("Kernel TCP Socket LND v3.0.0"); +MODULE_DESCRIPTION("TCP Socket LNet Network Driver"); +MODULE_VERSION("2.7.0"); MODULE_LICENSE("GPL"); -MODULE_VERSION("3.0.0"); -module_init(ksocknal_module_init); -module_exit(ksocknal_module_fini); +module_init(ksocklnd_init); +module_exit(ksocklnd_exit); diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h index f4fa72550..a60d72f94 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h @@ -19,10 +19,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * */ #ifndef _SOCKLND_SOCKLND_H_ @@ -69,8 +65,10 @@ #define SOCKNAL_VERSION_DEBUG 0 /* enable protocol version debugging */ -/* risk kmap deadlock on multi-frag I/O (backs off to single-frag if disabled). - * no risk if we're not running on a CONFIG_HIGHMEM platform. */ +/* + * risk kmap deadlock on multi-frag I/O (backs off to single-frag if disabled). + * no risk if we're not running on a CONFIG_HIGHMEM platform. + */ #ifdef CONFIG_HIGHMEM # define SOCKNAL_RISK_KMAP_DEADLOCK 0 #else @@ -237,15 +235,16 @@ typedef struct { #define SOCKNAL_INIT_DATA 1 #define SOCKNAL_INIT_ALL 2 -/* A packet just assembled for transmission is represented by 1 or more +/* + * A packet just assembled for transmission is represented by 1 or more * struct iovec fragments (the first frag contains the portals header), * followed by 0 or more lnet_kiov_t fragments. * * On the receive side, initially 1 struct iovec fragment is posted for * receive (the header). Once the header has been received, the payload is * received into either struct iovec or lnet_kiov_t fragments, depending on - * what the header matched or whether the message needs forwarding. */ - + * what the header matched or whether the message needs forwarding. + */ struct ksock_conn; /* forward ref */ struct ksock_peer; /* forward ref */ struct ksock_route; /* forward ref */ @@ -284,12 +283,14 @@ typedef struct /* transmit packet */ } tx_frags; } ksock_tx_t; -#define KSOCK_NOOP_TX_SIZE ((int)offsetof(ksock_tx_t, tx_frags.paged.kiov[0])) +#define KSOCK_NOOP_TX_SIZE (offsetof(ksock_tx_t, tx_frags.paged.kiov[0])) /* network zero copy callback descriptor embedded in ksock_tx_t */ -/* space for the rx frag descriptors; we either read a single contiguous - * header, or up to LNET_MAX_IOV frags of payload of either type. */ +/* + * space for the rx frag descriptors; we either read a single contiguous + * header, or up to LNET_MAX_IOV frags of payload of either type. + */ typedef union { struct kvec iov[LNET_MAX_IOV]; lnet_kiov_t kiov[LNET_MAX_IOV]; @@ -463,11 +464,13 @@ typedef struct ksock_proto { /* handle ZC ACK */ int (*pro_handle_zcack)(ksock_conn_t *, __u64, __u64); - /* msg type matches the connection type: + /* + * msg type matches the connection type: * return value: * return MATCH_NO : no * return MATCH_YES : matching type - * return MATCH_MAY : can be backup */ + * return MATCH_MAY : can be backup + */ int (*pro_match_tx)(ksock_conn_t *, ksock_tx_t *, int); } ksock_proto_t; diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c index 477b385f1..976fd7892 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c @@ -19,9 +19,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "socklnd.h" @@ -47,10 +44,10 @@ ksocknal_alloc_tx(int type, int size) spin_unlock(&ksocknal_data.ksnd_tx_lock); } - if (tx == NULL) + if (!tx) LIBCFS_ALLOC(tx, size); - if (tx == NULL) + if (!tx) return NULL; atomic_set(&tx->tx_refcount, 1); @@ -70,7 +67,7 @@ ksocknal_alloc_tx_noop(__u64 cookie, int nonblk) ksock_tx_t *tx; tx = ksocknal_alloc_tx(KSOCK_MSG_NOOP, KSOCK_NOOP_TX_SIZE); - if (tx == NULL) { + if (!tx) { CERROR("Can't allocate noop tx desc\n"); return NULL; } @@ -90,11 +87,11 @@ ksocknal_alloc_tx_noop(__u64 cookie, int nonblk) } void -ksocknal_free_tx (ksock_tx_t *tx) +ksocknal_free_tx(ksock_tx_t *tx) { atomic_dec(&ksocknal_data.ksnd_nactive_txs); - if (tx->tx_lnetmsg == NULL && tx->tx_desc_size == KSOCK_NOOP_TX_SIZE) { + if (!tx->tx_lnetmsg && tx->tx_desc_size == KSOCK_NOOP_TX_SIZE) { /* it's a noop tx */ spin_lock(&ksocknal_data.ksnd_tx_lock); @@ -107,7 +104,7 @@ ksocknal_free_tx (ksock_tx_t *tx) } static int -ksocknal_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) +ksocknal_send_iov(ksock_conn_t *conn, ksock_tx_t *tx) { struct kvec *iov = tx->tx_iov; int nob; @@ -122,7 +119,7 @@ ksocknal_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) return rc; nob = rc; - LASSERT (nob <= tx->tx_resid); + LASSERT(nob <= tx->tx_resid); tx->tx_resid -= nob; /* "consume" iov */ @@ -138,19 +135,19 @@ ksocknal_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) nob -= iov->iov_len; tx->tx_iov = ++iov; tx->tx_niov--; - } while (nob != 0); + } while (nob); return rc; } static int -ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) +ksocknal_send_kiov(ksock_conn_t *conn, ksock_tx_t *tx) { lnet_kiov_t *kiov = tx->tx_kiov; int nob; int rc; - LASSERT(tx->tx_niov == 0); + LASSERT(!tx->tx_niov); LASSERT(tx->tx_nkiov > 0); /* Never touch tx->tx_kiov inside ksocknal_lib_send_kiov() */ @@ -160,7 +157,7 @@ ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) return rc; nob = rc; - LASSERT (nob <= tx->tx_resid); + LASSERT(nob <= tx->tx_resid); tx->tx_resid -= nob; /* "consume" kiov */ @@ -176,27 +173,27 @@ ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) nob -= (int)kiov->kiov_len; tx->tx_kiov = ++kiov; tx->tx_nkiov--; - } while (nob != 0); + } while (nob); return rc; } static int -ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx) +ksocknal_transmit(ksock_conn_t *conn, ksock_tx_t *tx) { int rc; int bufnob; - if (ksocknal_data.ksnd_stall_tx != 0) { + if (ksocknal_data.ksnd_stall_tx) { set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(cfs_time_seconds(ksocknal_data.ksnd_stall_tx)); } - LASSERT(tx->tx_resid != 0); + LASSERT(tx->tx_resid); rc = ksocknal_connsock_addref(conn); - if (rc != 0) { - LASSERT (conn->ksnc_closing); + if (rc) { + LASSERT(conn->ksnc_closing); return -ESHUTDOWN; } @@ -205,10 +202,10 @@ ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx) /* testing... */ ksocknal_data.ksnd_enomem_tx--; rc = -EAGAIN; - } else if (tx->tx_niov != 0) { - rc = ksocknal_send_iov (conn, tx); + } else if (tx->tx_niov) { + rc = ksocknal_send_iov(conn, tx); } else { - rc = ksocknal_send_kiov (conn, tx); + rc = ksocknal_send_kiov(conn, tx); } bufnob = conn->ksnc_sock->sk->sk_wmem_queued; @@ -216,8 +213,10 @@ ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx) conn->ksnc_tx_bufnob += rc; /* account it */ if (bufnob < conn->ksnc_tx_bufnob) { - /* allocated send buffer bytes < computed; infer - * something got ACKed */ + /* + * allocated send buffer bytes < computed; infer + * something got ACKed + */ conn->ksnc_tx_deadline = cfs_time_shift(*ksocknal_tunables.ksnd_timeout); conn->ksnc_peer->ksnp_last_alive = cfs_time_current(); @@ -227,7 +226,7 @@ ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx) if (rc <= 0) { /* Didn't write anything? */ - if (rc == 0) /* some stacks return 0 instead of -EAGAIN */ + if (!rc) /* some stacks return 0 instead of -EAGAIN */ rc = -EAGAIN; /* Check if EAGAIN is due to memory pressure */ @@ -238,17 +237,17 @@ ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx) } /* socket's wmem_queued now includes 'rc' bytes */ - atomic_sub (rc, &conn->ksnc_tx_nob); + atomic_sub(rc, &conn->ksnc_tx_nob); rc = 0; - } while (tx->tx_resid != 0); + } while (tx->tx_resid); ksocknal_connsock_decref(conn); return rc; } static int -ksocknal_recv_iov (ksock_conn_t *conn) +ksocknal_recv_iov(ksock_conn_t *conn) { struct kvec *iov = conn->ksnc_rx_iov; int nob; @@ -256,8 +255,10 @@ ksocknal_recv_iov (ksock_conn_t *conn) LASSERT(conn->ksnc_rx_niov > 0); - /* Never touch conn->ksnc_rx_iov or change connection - * status inside ksocknal_lib_recv_iov */ + /* + * Never touch conn->ksnc_rx_iov or change connection + * status inside ksocknal_lib_recv_iov + */ rc = ksocknal_lib_recv_iov(conn); if (rc <= 0) @@ -287,13 +288,13 @@ ksocknal_recv_iov (ksock_conn_t *conn) nob -= iov->iov_len; conn->ksnc_rx_iov = ++iov; conn->ksnc_rx_niov--; - } while (nob != 0); + } while (nob); return rc; } static int -ksocknal_recv_kiov (ksock_conn_t *conn) +ksocknal_recv_kiov(ksock_conn_t *conn) { lnet_kiov_t *kiov = conn->ksnc_rx_kiov; int nob; @@ -301,8 +302,10 @@ ksocknal_recv_kiov (ksock_conn_t *conn) LASSERT(conn->ksnc_rx_nkiov > 0); - /* Never touch conn->ksnc_rx_kiov or change connection - * status inside ksocknal_lib_recv_iov */ + /* + * Never touch conn->ksnc_rx_kiov or change connection + * status inside ksocknal_lib_recv_iov + */ rc = ksocknal_lib_recv_kiov(conn); if (rc <= 0) @@ -332,41 +335,43 @@ ksocknal_recv_kiov (ksock_conn_t *conn) nob -= kiov->kiov_len; conn->ksnc_rx_kiov = ++kiov; conn->ksnc_rx_nkiov--; - } while (nob != 0); + } while (nob); return 1; } static int -ksocknal_receive (ksock_conn_t *conn) +ksocknal_receive(ksock_conn_t *conn) { - /* Return 1 on success, 0 on EOF, < 0 on error. + /* + * Return 1 on success, 0 on EOF, < 0 on error. * Caller checks ksnc_rx_nob_wanted to determine - * progress/completion. */ + * progress/completion. + */ int rc; - if (ksocknal_data.ksnd_stall_rx != 0) { + if (ksocknal_data.ksnd_stall_rx) { set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(cfs_time_seconds(ksocknal_data.ksnd_stall_rx)); } rc = ksocknal_connsock_addref(conn); - if (rc != 0) { - LASSERT (conn->ksnc_closing); + if (rc) { + LASSERT(conn->ksnc_closing); return -ESHUTDOWN; } for (;;) { - if (conn->ksnc_rx_niov != 0) - rc = ksocknal_recv_iov (conn); + if (conn->ksnc_rx_niov) + rc = ksocknal_recv_iov(conn); else - rc = ksocknal_recv_kiov (conn); + rc = ksocknal_recv_kiov(conn); if (rc <= 0) { /* error/EOF or partial receive */ if (rc == -EAGAIN) { rc = 1; - } else if (rc == 0 && conn->ksnc_rx_started) { + } else if (!rc && conn->ksnc_rx_started) { /* EOF in the middle of a message */ rc = -EPROTO; } @@ -375,7 +380,7 @@ ksocknal_receive (ksock_conn_t *conn) /* Completed a fragment */ - if (conn->ksnc_rx_nob_wanted == 0) { + if (!conn->ksnc_rx_nob_wanted) { rc = 1; break; } @@ -386,36 +391,36 @@ ksocknal_receive (ksock_conn_t *conn) } void -ksocknal_tx_done (lnet_ni_t *ni, ksock_tx_t *tx) +ksocknal_tx_done(lnet_ni_t *ni, ksock_tx_t *tx) { lnet_msg_t *lnetmsg = tx->tx_lnetmsg; - int rc = (tx->tx_resid == 0 && !tx->tx_zc_aborted) ? 0 : -EIO; + int rc = (!tx->tx_resid && !tx->tx_zc_aborted) ? 0 : -EIO; - LASSERT(ni != NULL || tx->tx_conn != NULL); + LASSERT(ni || tx->tx_conn); - if (tx->tx_conn != NULL) + if (tx->tx_conn) ksocknal_conn_decref(tx->tx_conn); - if (ni == NULL && tx->tx_conn != NULL) + if (!ni && tx->tx_conn) ni = tx->tx_conn->ksnc_peer->ksnp_ni; - ksocknal_free_tx (tx); - if (lnetmsg != NULL) /* KSOCK_MSG_NOOP go without lnetmsg */ - lnet_finalize (ni, lnetmsg, rc); + ksocknal_free_tx(tx); + if (lnetmsg) /* KSOCK_MSG_NOOP go without lnetmsg */ + lnet_finalize(ni, lnetmsg, rc); } void -ksocknal_txlist_done (lnet_ni_t *ni, struct list_head *txlist, int error) +ksocknal_txlist_done(lnet_ni_t *ni, struct list_head *txlist, int error) { ksock_tx_t *tx; - while (!list_empty (txlist)) { + while (!list_empty(txlist)) { tx = list_entry(txlist->next, ksock_tx_t, tx_list); - if (error && tx->tx_lnetmsg != NULL) { + if (error && tx->tx_lnetmsg) { CNETERR("Deleting packet type %d len %d %s->%s\n", - le32_to_cpu (tx->tx_lnetmsg->msg_hdr.type), - le32_to_cpu (tx->tx_lnetmsg->msg_hdr.payload_length), + le32_to_cpu(tx->tx_lnetmsg->msg_hdr.type), + le32_to_cpu(tx->tx_lnetmsg->msg_hdr.payload_length), libcfs_nid2str(le64_to_cpu(tx->tx_lnetmsg->msg_hdr.src_nid)), libcfs_nid2str(le64_to_cpu(tx->tx_lnetmsg->msg_hdr.dest_nid))); } else if (error) { @@ -435,12 +440,14 @@ ksocknal_check_zc_req(ksock_tx_t *tx) ksock_conn_t *conn = tx->tx_conn; ksock_peer_t *peer = conn->ksnc_peer; - /* Set tx_msg.ksm_zc_cookies[0] to a unique non-zero cookie and add tx + /* + * Set tx_msg.ksm_zc_cookies[0] to a unique non-zero cookie and add tx * to ksnp_zc_req_list if some fragment of this message should be sent * zero-copy. Our peer will send an ACK containing this cookie when * she has received this message to tell us we can signal completion. * tx_msg.ksm_zc_cookies[0] remains non-zero while tx is on - * ksnp_zc_req_list. */ + * ksnp_zc_req_list. + */ LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP); LASSERT(tx->tx_zc_capable); @@ -450,9 +457,10 @@ ksocknal_check_zc_req(ksock_tx_t *tx) !conn->ksnc_zc_capable) return; - /* assign cookie and queue tx to pending list, it will be released when - * a matching ack is received. See ksocknal_handle_zcack() */ - + /* + * assign cookie and queue tx to pending list, it will be released when + * a matching ack is received. See ksocknal_handle_zcack() + */ ksocknal_tx_addref(tx); spin_lock(&peer->ksnp_lock); @@ -461,11 +469,11 @@ ksocknal_check_zc_req(ksock_tx_t *tx) tx->tx_deadline = cfs_time_shift(*ksocknal_tunables.ksnd_timeout); - LASSERT(tx->tx_msg.ksm_zc_cookies[0] == 0); + LASSERT(!tx->tx_msg.ksm_zc_cookies[0]); tx->tx_msg.ksm_zc_cookies[0] = peer->ksnp_zc_next_cookie++; - if (peer->ksnp_zc_next_cookie == 0) + if (!peer->ksnp_zc_next_cookie) peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1; list_add_tail(&tx->tx_zc_list, &peer->ksnp_zc_req_list); @@ -485,7 +493,7 @@ ksocknal_uncheck_zc_req(ksock_tx_t *tx) spin_lock(&peer->ksnp_lock); - if (tx->tx_msg.ksm_zc_cookies[0] == 0) { + if (!tx->tx_msg.ksm_zc_cookies[0]) { /* Not waiting for an ACK */ spin_unlock(&peer->ksnp_lock); return; @@ -500,20 +508,20 @@ ksocknal_uncheck_zc_req(ksock_tx_t *tx) } static int -ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx) +ksocknal_process_transmit(ksock_conn_t *conn, ksock_tx_t *tx) { int rc; if (tx->tx_zc_capable && !tx->tx_zc_checked) ksocknal_check_zc_req(tx); - rc = ksocknal_transmit (conn, tx); + rc = ksocknal_transmit(conn, tx); CDEBUG(D_NET, "send(%d) %d\n", tx->tx_resid, rc); - if (tx->tx_resid == 0) { + if (!tx->tx_resid) { /* Sent everything OK */ - LASSERT (rc == 0); + LASSERT(!rc); return 0; } @@ -532,13 +540,13 @@ ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx) spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); /* enomem list takes over scheduler's ref... */ - LASSERT (conn->ksnc_tx_scheduled); + LASSERT(conn->ksnc_tx_scheduled); list_add_tail(&conn->ksnc_tx_list, - &ksocknal_data.ksnd_enomem_conns); + &ksocknal_data.ksnd_enomem_conns); if (!cfs_time_aftereq(cfs_time_add(cfs_time_current(), SOCKNAL_ENOMEM_RETRY), ksocknal_data.ksnd_reaper_waketime)) - wake_up (&ksocknal_data.ksnd_reaper_waitq); + wake_up(&ksocknal_data.ksnd_reaper_waitq); spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock); return rc; @@ -569,21 +577,19 @@ ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx) ksocknal_uncheck_zc_req(tx); /* it's not an error if conn is being closed */ - ksocknal_close_conn_and_siblings (conn, - (conn->ksnc_closing) ? 0 : rc); + ksocknal_close_conn_and_siblings(conn, (conn->ksnc_closing) ? 0 : rc); return rc; } static void -ksocknal_launch_connection_locked (ksock_route_t *route) +ksocknal_launch_connection_locked(ksock_route_t *route) { - /* called holding write lock on ksnd_global_lock */ LASSERT(!route->ksnr_scheduled); LASSERT(!route->ksnr_connecting); - LASSERT((ksocknal_route_mask() & ~route->ksnr_connected) != 0); + LASSERT(ksocknal_route_mask() & ~route->ksnr_connected); route->ksnr_scheduled = 1; /* scheduling conn for connd */ ksocknal_route_addref(route); /* extra ref for connd */ @@ -591,14 +597,14 @@ ksocknal_launch_connection_locked (ksock_route_t *route) spin_lock_bh(&ksocknal_data.ksnd_connd_lock); list_add_tail(&route->ksnr_connd_list, - &ksocknal_data.ksnd_connd_routes); + &ksocknal_data.ksnd_connd_routes); wake_up(&ksocknal_data.ksnd_connd_waitq); spin_unlock_bh(&ksocknal_data.ksnd_connd_lock); } void -ksocknal_launch_all_connections_locked (ksock_peer_t *peer) +ksocknal_launch_all_connections_locked(ksock_peer_t *peer) { ksock_route_t *route; @@ -606,7 +612,7 @@ ksocknal_launch_all_connections_locked (ksock_peer_t *peer) for (;;) { /* launch any/all connections that need it */ route = ksocknal_find_connectable_route_locked(peer); - if (route == NULL) + if (!route) return; ksocknal_launch_connection_locked(route); @@ -623,15 +629,15 @@ ksocknal_find_conn_locked(ksock_peer_t *peer, ksock_tx_t *tx, int nonblk) int tnob = 0; int fnob = 0; - list_for_each (tmp, &peer->ksnp_conns) { + list_for_each(tmp, &peer->ksnp_conns) { ksock_conn_t *c = list_entry(tmp, ksock_conn_t, ksnc_list); int nob = atomic_read(&c->ksnc_tx_nob) + c->ksnc_sock->sk->sk_wmem_queued; int rc; LASSERT(!c->ksnc_closing); - LASSERT(c->ksnc_proto != NULL && - c->ksnc_proto->pro_match_tx != NULL); + LASSERT(c->ksnc_proto && + c->ksnc_proto->pro_match_tx); rc = c->ksnc_proto->pro_match_tx(c, tx, nonblk); @@ -642,7 +648,7 @@ ksocknal_find_conn_locked(ksock_peer_t *peer, ksock_tx_t *tx, int nonblk) continue; case SOCKNAL_MATCH_YES: /* typed connection */ - if (typed == NULL || tnob > nob || + if (!typed || tnob > nob || (tnob == nob && *ksocknal_tunables.ksnd_round_robin && cfs_time_after(typed->ksnc_tx_last_post, c->ksnc_tx_last_post))) { typed = c; @@ -651,7 +657,7 @@ ksocknal_find_conn_locked(ksock_peer_t *peer, ksock_tx_t *tx, int nonblk) break; case SOCKNAL_MATCH_MAY: /* fallback connection */ - if (fallback == NULL || fnob > nob || + if (!fallback || fnob > nob || (fnob == nob && *ksocknal_tunables.ksnd_round_robin && cfs_time_after(fallback->ksnc_tx_last_post, c->ksnc_tx_last_post))) { fallback = c; @@ -662,9 +668,9 @@ ksocknal_find_conn_locked(ksock_peer_t *peer, ksock_tx_t *tx, int nonblk) } /* prefer the typed selection */ - conn = (typed != NULL) ? typed : fallback; + conn = (typed) ? typed : fallback; - if (conn != NULL) + if (conn) conn->ksnc_tx_last_post = cfs_time_current(); return conn; @@ -675,48 +681,51 @@ ksocknal_tx_prep(ksock_conn_t *conn, ksock_tx_t *tx) { conn->ksnc_proto->pro_pack(tx); - atomic_add (tx->tx_nob, &conn->ksnc_tx_nob); + atomic_add(tx->tx_nob, &conn->ksnc_tx_nob); ksocknal_conn_addref(conn); /* +1 ref for tx */ tx->tx_conn = conn; } void -ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) +ksocknal_queue_tx_locked(ksock_tx_t *tx, ksock_conn_t *conn) { ksock_sched_t *sched = conn->ksnc_scheduler; ksock_msg_t *msg = &tx->tx_msg; ksock_tx_t *ztx = NULL; int bufnob = 0; - /* called holding global lock (read or irq-write) and caller may + /* + * called holding global lock (read or irq-write) and caller may * not have dropped this lock between finding conn and calling me, * so we don't need the {get,put}connsock dance to deref - * ksnc_sock... */ + * ksnc_sock... + */ LASSERT(!conn->ksnc_closing); CDEBUG(D_NET, "Sending to %s ip %pI4h:%d\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - &conn->ksnc_ipaddr, - conn->ksnc_port); + libcfs_id2str(conn->ksnc_peer->ksnp_id), + &conn->ksnc_ipaddr, conn->ksnc_port); ksocknal_tx_prep(conn, tx); - /* Ensure the frags we've been given EXACTLY match the number of + /* + * Ensure the frags we've been given EXACTLY match the number of * bytes we want to send. Many TCP/IP stacks disregard any total * size parameters passed to them and just look at the frags. * * We always expect at least 1 mapped fragment containing the - * complete ksocknal message header. */ - LASSERT(lnet_iov_nob (tx->tx_niov, tx->tx_iov) + + * complete ksocknal message header. + */ + LASSERT(lnet_iov_nob(tx->tx_niov, tx->tx_iov) + lnet_kiov_nob(tx->tx_nkiov, tx->tx_kiov) == (unsigned int)tx->tx_nob); LASSERT(tx->tx_niov >= 1); LASSERT(tx->tx_resid == tx->tx_nob); - CDEBUG (D_NET, "Packet %p type %d, nob %d niov %d nkiov %d\n", - tx, (tx->tx_lnetmsg != NULL) ? tx->tx_lnetmsg->msg_hdr.type : - KSOCK_MSG_NOOP, - tx->tx_nob, tx->tx_niov, tx->tx_nkiov); + CDEBUG(D_NET, "Packet %p type %d, nob %d niov %d nkiov %d\n", + tx, (tx->tx_lnetmsg) ? tx->tx_lnetmsg->msg_hdr.type : + KSOCK_MSG_NOOP, + tx->tx_nob, tx->tx_niov, tx->tx_nkiov); /* * FIXME: SOCK_WMEM_QUEUED and SOCK_ERROR could block in __DARWIN8__ @@ -725,7 +734,7 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) bufnob = conn->ksnc_sock->sk->sk_wmem_queued; spin_lock_bh(&sched->kss_lock); - if (list_empty(&conn->ksnc_tx_queue) && bufnob == 0) { + if (list_empty(&conn->ksnc_tx_queue) && !bufnob) { /* First packet starts the timeout */ conn->ksnc_tx_deadline = cfs_time_shift(*ksocknal_tunables.ksnd_timeout); @@ -736,26 +745,30 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) } if (msg->ksm_type == KSOCK_MSG_NOOP) { - /* The packet is noop ZC ACK, try to piggyback the ack_cookie - * on a normal packet so I don't need to send it */ - LASSERT(msg->ksm_zc_cookies[1] != 0); - LASSERT(conn->ksnc_proto->pro_queue_tx_zcack != NULL); + /* + * The packet is noop ZC ACK, try to piggyback the ack_cookie + * on a normal packet so I don't need to send it + */ + LASSERT(msg->ksm_zc_cookies[1]); + LASSERT(conn->ksnc_proto->pro_queue_tx_zcack); if (conn->ksnc_proto->pro_queue_tx_zcack(conn, tx, 0)) ztx = tx; /* ZC ACK piggybacked on ztx release tx later */ } else { - /* It's a normal packet - can it piggback a noop zc-ack that - * has been queued already? */ - LASSERT(msg->ksm_zc_cookies[1] == 0); - LASSERT(conn->ksnc_proto->pro_queue_tx_msg != NULL); + /* + * It's a normal packet - can it piggback a noop zc-ack that + * has been queued already? + */ + LASSERT(!msg->ksm_zc_cookies[1]); + LASSERT(conn->ksnc_proto->pro_queue_tx_msg); ztx = conn->ksnc_proto->pro_queue_tx_msg(conn, tx); /* ztx will be released later */ } - if (ztx != NULL) { - atomic_sub (ztx->tx_nob, &conn->ksnc_tx_nob); + if (ztx) { + atomic_sub(ztx->tx_nob, &conn->ksnc_tx_nob); list_add_tail(&ztx->tx_list, &sched->kss_zombie_noop_txs); } @@ -763,24 +776,23 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) !conn->ksnc_tx_scheduled) { /* not scheduled to send */ /* +1 ref for scheduler */ ksocknal_conn_addref(conn); - list_add_tail (&conn->ksnc_tx_list, - &sched->kss_tx_conns); + list_add_tail(&conn->ksnc_tx_list, &sched->kss_tx_conns); conn->ksnc_tx_scheduled = 1; - wake_up (&sched->kss_waitq); + wake_up(&sched->kss_waitq); } spin_unlock_bh(&sched->kss_lock); } ksock_route_t * -ksocknal_find_connectable_route_locked (ksock_peer_t *peer) +ksocknal_find_connectable_route_locked(ksock_peer_t *peer) { unsigned long now = cfs_time_current(); struct list_head *tmp; ksock_route_t *route; - list_for_each (tmp, &peer->ksnp_routes) { - route = list_entry (tmp, ksock_route_t, ksnr_list); + list_for_each(tmp, &peer->ksnp_routes) { + route = list_entry(tmp, ksock_route_t, ksnr_list); LASSERT(!route->ksnr_connecting || route->ksnr_scheduled); @@ -788,10 +800,10 @@ ksocknal_find_connectable_route_locked (ksock_peer_t *peer) continue; /* all route types connected ? */ - if ((ksocknal_route_mask() & ~route->ksnr_connected) == 0) + if (!(ksocknal_route_mask() & ~route->ksnr_connected)) continue; - if (!(route->ksnr_retry_interval == 0 || /* first attempt */ + if (!(!route->ksnr_retry_interval || /* first attempt */ cfs_time_aftereq(now, route->ksnr_timeout))) { CDEBUG(D_NET, "Too soon to retry route %pI4h (cnted %d, interval %ld, %ld secs later)\n", @@ -809,13 +821,13 @@ ksocknal_find_connectable_route_locked (ksock_peer_t *peer) } ksock_route_t * -ksocknal_find_connecting_route_locked (ksock_peer_t *peer) +ksocknal_find_connecting_route_locked(ksock_peer_t *peer) { struct list_head *tmp; ksock_route_t *route; - list_for_each (tmp, &peer->ksnp_routes) { - route = list_entry (tmp, ksock_route_t, ksnr_list); + list_for_each(tmp, &peer->ksnp_routes) { + route = list_entry(tmp, ksock_route_t, ksnr_list); LASSERT(!route->ksnr_connecting || route->ksnr_scheduled); @@ -827,7 +839,7 @@ ksocknal_find_connecting_route_locked (ksock_peer_t *peer) } int -ksocknal_launch_packet (lnet_ni_t *ni, ksock_tx_t *tx, lnet_process_id_t id) +ksocknal_launch_packet(lnet_ni_t *ni, ksock_tx_t *tx, lnet_process_id_t id) { ksock_peer_t *peer; ksock_conn_t *conn; @@ -835,21 +847,23 @@ ksocknal_launch_packet (lnet_ni_t *ni, ksock_tx_t *tx, lnet_process_id_t id) int retry; int rc; - LASSERT(tx->tx_conn == NULL); + LASSERT(!tx->tx_conn); g_lock = &ksocknal_data.ksnd_global_lock; for (retry = 0;; retry = 1) { read_lock(g_lock); peer = ksocknal_find_peer_locked(ni, id); - if (peer != NULL) { - if (ksocknal_find_connectable_route_locked(peer) == NULL) { + if (peer) { + if (!ksocknal_find_connectable_route_locked(peer)) { conn = ksocknal_find_conn_locked(peer, tx, tx->tx_nonblk); - if (conn != NULL) { - /* I've got no routes that need to be + if (conn) { + /* + * I've got no routes that need to be * connecting and I do have an actual - * connection... */ - ksocknal_queue_tx_locked (tx, conn); + * connection... + */ + ksocknal_queue_tx_locked(tx, conn); read_unlock(g_lock); return 0; } @@ -862,12 +876,12 @@ ksocknal_launch_packet (lnet_ni_t *ni, ksock_tx_t *tx, lnet_process_id_t id) write_lock_bh(g_lock); peer = ksocknal_find_peer_locked(ni, id); - if (peer != NULL) + if (peer) break; write_unlock_bh(g_lock); - if ((id.pid & LNET_PID_USERFLAG) != 0) { + if (id.pid & LNET_PID_USERFLAG) { CERROR("Refusing to create a connection to userspace process %s\n", libcfs_id2str(id)); return -EHOSTUNREACH; @@ -881,7 +895,7 @@ ksocknal_launch_packet (lnet_ni_t *ni, ksock_tx_t *tx, lnet_process_id_t id) rc = ksocknal_add_peer(ni, id, LNET_NIDADDR(id.nid), lnet_acceptor_port()); - if (rc != 0) { + if (rc) { CERROR("Can't add peer %s: %d\n", libcfs_id2str(id), rc); return rc; @@ -891,21 +905,21 @@ ksocknal_launch_packet (lnet_ni_t *ni, ksock_tx_t *tx, lnet_process_id_t id) ksocknal_launch_all_connections_locked(peer); conn = ksocknal_find_conn_locked(peer, tx, tx->tx_nonblk); - if (conn != NULL) { + if (conn) { /* Connection exists; queue message on it */ - ksocknal_queue_tx_locked (tx, conn); + ksocknal_queue_tx_locked(tx, conn); write_unlock_bh(g_lock); return 0; } if (peer->ksnp_accepting > 0 || - ksocknal_find_connecting_route_locked (peer) != NULL) { + ksocknal_find_connecting_route_locked(peer)) { /* the message is going to be pinned to the peer */ tx->tx_deadline = cfs_time_shift(*ksocknal_tunables.ksnd_timeout); /* Queue the message until a connection is established */ - list_add_tail (&tx->tx_list, &peer->ksnp_tx_queue); + list_add_tail(&tx->tx_list, &peer->ksnp_tx_queue); write_unlock_bh(g_lock); return 0; } @@ -932,19 +946,20 @@ ksocknal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) int desc_size; int rc; - /* NB 'private' is different depending on what we're sending. - * Just ignore it... */ - + /* + * NB 'private' is different depending on what we're sending. + * Just ignore it... + */ CDEBUG(D_NET, "sending %u bytes in %d frags to %s\n", payload_nob, payload_niov, libcfs_id2str(target)); - LASSERT(payload_nob == 0 || payload_niov > 0); + LASSERT(!payload_nob || payload_niov > 0); LASSERT(payload_niov <= LNET_MAX_IOV); /* payload is either all vaddrs or all pages */ - LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); - LASSERT (!in_interrupt ()); + LASSERT(!(payload_kiov && payload_iov)); + LASSERT(!in_interrupt()); - if (payload_iov != NULL) + if (payload_iov) desc_size = offsetof(ksock_tx_t, tx_frags.virt.iov[1 + payload_niov]); else @@ -954,7 +969,7 @@ ksocknal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) if (lntmsg->msg_vmflush) mpflag = cfs_memory_pressure_get_and_set(); tx = ksocknal_alloc_tx(KSOCK_MSG_LNET, desc_size); - if (tx == NULL) { + if (!tx) { CERROR("Can't allocate tx desc type %d size %d\n", type, desc_size); if (lntmsg->msg_vmflush) @@ -965,7 +980,7 @@ ksocknal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) tx->tx_conn = NULL; /* set when assigned a conn */ tx->tx_lnetmsg = lntmsg; - if (payload_iov != NULL) { + if (payload_iov) { tx->tx_kiov = NULL; tx->tx_nkiov = 0; tx->tx_iov = tx->tx_frags.virt.iov; @@ -992,7 +1007,7 @@ ksocknal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) if (!mpflag) cfs_memory_pressure_restore(mpflag); - if (rc == 0) + if (!rc) return 0; ksocknal_free_tx(tx); @@ -1014,7 +1029,7 @@ ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name) } void -ksocknal_thread_fini (void) +ksocknal_thread_fini(void) { write_lock_bh(&ksocknal_data.ksnd_global_lock); ksocknal_data.ksnd_nthreads--; @@ -1022,7 +1037,7 @@ ksocknal_thread_fini (void) } int -ksocknal_new_packet (ksock_conn_t *conn, int nob_to_skip) +ksocknal_new_packet(ksock_conn_t *conn, int nob_to_skip) { static char ksocknal_slop_buffer[4096]; @@ -1030,14 +1045,14 @@ ksocknal_new_packet (ksock_conn_t *conn, int nob_to_skip) unsigned int niov; int skipped; - LASSERT(conn->ksnc_proto != NULL); + LASSERT(conn->ksnc_proto); - if ((*ksocknal_tunables.ksnd_eager_ack & conn->ksnc_type) != 0) { + if (*ksocknal_tunables.ksnd_eager_ack & conn->ksnc_type) { /* Remind the socket to ack eagerly... */ ksocknal_lib_eager_ack(conn); } - if (nob_to_skip == 0) { /* right at next packet boundary now */ + if (!nob_to_skip) { /* right at next packet boundary now */ conn->ksnc_rx_started = 0; mb(); /* racing with timeout thread */ @@ -1061,11 +1076,11 @@ ksocknal_new_packet (ksock_conn_t *conn, int nob_to_skip) conn->ksnc_rx_iov = (struct kvec *)&conn->ksnc_rx_iov_space; conn->ksnc_rx_iov[0].iov_base = &conn->ksnc_msg.ksm_u.lnetmsg; - conn->ksnc_rx_iov[0].iov_len = sizeof (lnet_hdr_t); + conn->ksnc_rx_iov[0].iov_len = sizeof(lnet_hdr_t); break; default: - LBUG (); + LBUG(); } conn->ksnc_rx_niov = 1; @@ -1075,9 +1090,10 @@ ksocknal_new_packet (ksock_conn_t *conn, int nob_to_skip) return 1; } - /* Set up to skip as much as possible now. If there's more left - * (ran out of iov entries) we'll get called again */ - + /* + * Set up to skip as much as possible now. If there's more left + * (ran out of iov entries) we'll get called again + */ conn->ksnc_rx_state = SOCKNAL_RX_SLOP; conn->ksnc_rx_nob_left = nob_to_skip; conn->ksnc_rx_iov = (struct kvec *)&conn->ksnc_rx_iov_space; @@ -1093,8 +1109,8 @@ ksocknal_new_packet (ksock_conn_t *conn, int nob_to_skip) skipped += nob; nob_to_skip -= nob; - } while (nob_to_skip != 0 && /* mustn't overflow conn's rx iov */ - niov < sizeof(conn->ksnc_rx_iov_space) / sizeof (struct iovec)); + } while (nob_to_skip && /* mustn't overflow conn's rx iov */ + niov < sizeof(conn->ksnc_rx_iov_space) / sizeof(struct iovec)); conn->ksnc_rx_niov = niov; conn->ksnc_rx_kiov = NULL; @@ -1104,13 +1120,13 @@ ksocknal_new_packet (ksock_conn_t *conn, int nob_to_skip) } static int -ksocknal_process_receive (ksock_conn_t *conn) +ksocknal_process_receive(ksock_conn_t *conn) { lnet_hdr_t *lhdr; lnet_process_id_t *id; int rc; - LASSERT (atomic_read(&conn->ksnc_conn_refcount) > 0); + LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0); /* NB: sched lock NOT held */ /* SOCKNAL_RX_LNET_HEADER is here for backward compatibility */ @@ -1119,13 +1135,13 @@ ksocknal_process_receive (ksock_conn_t *conn) conn->ksnc_rx_state == SOCKNAL_RX_LNET_HEADER || conn->ksnc_rx_state == SOCKNAL_RX_SLOP); again: - if (conn->ksnc_rx_nob_wanted != 0) { + if (conn->ksnc_rx_nob_wanted) { rc = ksocknal_receive(conn); if (rc <= 0) { - LASSERT (rc != -EAGAIN); + LASSERT(rc != -EAGAIN); - if (rc == 0) + if (!rc) CDEBUG(D_NET, "[%p] EOF from %s ip %pI4h:%d\n", conn, libcfs_id2str(conn->ksnc_peer->ksnp_id), @@ -1139,12 +1155,12 @@ ksocknal_process_receive (ksock_conn_t *conn) conn->ksnc_port); /* it's not an error if conn is being closed */ - ksocknal_close_conn_and_siblings (conn, - (conn->ksnc_closing) ? 0 : rc); - return (rc == 0 ? -ESHUTDOWN : rc); + ksocknal_close_conn_and_siblings(conn, + (conn->ksnc_closing) ? 0 : rc); + return (!rc ? -ESHUTDOWN : rc); } - if (conn->ksnc_rx_nob_wanted != 0) { + if (conn->ksnc_rx_nob_wanted) { /* short read */ return -EAGAIN; } @@ -1169,7 +1185,7 @@ ksocknal_process_receive (ksock_conn_t *conn) } if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP && - conn->ksnc_msg.ksm_csum != 0 && /* has checksum */ + conn->ksnc_msg.ksm_csum && /* has checksum */ conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) { /* NOOP Checksum error */ CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n", @@ -1180,10 +1196,10 @@ ksocknal_process_receive (ksock_conn_t *conn) return -EIO; } - if (conn->ksnc_msg.ksm_zc_cookies[1] != 0) { + if (conn->ksnc_msg.ksm_zc_cookies[1]) { __u64 cookie = 0; - LASSERT (conn->ksnc_proto != &ksocknal_protocol_v1x); + LASSERT(conn->ksnc_proto != &ksocknal_protocol_v1x); if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP) cookie = conn->ksnc_msg.ksm_zc_cookies[0]; @@ -1191,7 +1207,7 @@ ksocknal_process_receive (ksock_conn_t *conn) rc = conn->ksnc_proto->pro_handle_zcack(conn, cookie, conn->ksnc_msg.ksm_zc_cookies[1]); - if (rc != 0) { + if (rc) { CERROR("%s: Unknown ZC-ACK cookie: %llu, %llu\n", libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie, conn->ksnc_msg.ksm_zc_cookies[1]); @@ -1202,7 +1218,7 @@ ksocknal_process_receive (ksock_conn_t *conn) } if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP) { - ksocknal_new_packet (conn, 0); + ksocknal_new_packet(conn, 0); return 0; /* NOOP is done and just return */ } @@ -1224,7 +1240,7 @@ ksocknal_process_receive (ksock_conn_t *conn) /* unpack message header */ conn->ksnc_proto->pro_unpack(&conn->ksnc_msg); - if ((conn->ksnc_peer->ksnp_id.pid & LNET_PID_USERFLAG) != 0) { + if (conn->ksnc_peer->ksnp_id.pid & LNET_PID_USERFLAG) { /* Userspace peer */ lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr; id = &conn->ksnc_peer->ksnp_id; @@ -1243,14 +1259,14 @@ ksocknal_process_receive (ksock_conn_t *conn) if (rc < 0) { /* I just received garbage: give up on this conn */ ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings (conn, rc); + ksocknal_close_conn_and_siblings(conn, rc); ksocknal_conn_decref(conn); return -EPROTO; } /* I'm racing with ksocknal_recv() */ - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_PARSE || - conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD); + LASSERT(conn->ksnc_rx_state == SOCKNAL_RX_PARSE || + conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD); if (conn->ksnc_rx_state != SOCKNAL_RX_LNET_PAYLOAD) return 0; @@ -1262,8 +1278,8 @@ ksocknal_process_receive (ksock_conn_t *conn) /* payload all received */ rc = 0; - if (conn->ksnc_rx_nob_left == 0 && /* not truncating */ - conn->ksnc_msg.ksm_csum != 0 && /* has checksum */ + if (!conn->ksnc_rx_nob_left && /* not truncating */ + conn->ksnc_msg.ksm_csum && /* has checksum */ conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) { CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n", libcfs_id2str(conn->ksnc_peer->ksnp_id), @@ -1271,7 +1287,7 @@ ksocknal_process_receive (ksock_conn_t *conn) rc = -EIO; } - if (rc == 0 && conn->ksnc_msg.ksm_zc_cookies[0] != 0) { + if (!rc && conn->ksnc_msg.ksm_zc_cookies[0]) { LASSERT(conn->ksnc_proto != &ksocknal_protocol_v1x); lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr; @@ -1285,16 +1301,16 @@ ksocknal_process_receive (ksock_conn_t *conn) lnet_finalize(conn->ksnc_peer->ksnp_ni, conn->ksnc_cookie, rc); - if (rc != 0) { + if (rc) { ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings (conn, rc); + ksocknal_close_conn_and_siblings(conn, rc); return -EPROTO; } /* Fall through */ case SOCKNAL_RX_SLOP: /* starting new packet? */ - if (ksocknal_new_packet (conn, conn->ksnc_rx_nob_left)) + if (ksocknal_new_packet(conn, conn->ksnc_rx_nob_left)) return 0; /* come back later */ goto again; /* try to finish reading slop now */ @@ -1308,9 +1324,9 @@ ksocknal_process_receive (ksock_conn_t *conn) } int -ksocknal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, - unsigned int niov, struct kvec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) +ksocknal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, + unsigned int niov, struct kvec *iov, lnet_kiov_t *kiov, + unsigned int offset, unsigned int mlen, unsigned int rlen) { ksock_conn_t *conn = private; ksock_sched_t *sched = conn->ksnc_scheduler; @@ -1322,7 +1338,7 @@ ksocknal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, conn->ksnc_rx_nob_wanted = mlen; conn->ksnc_rx_nob_left = rlen; - if (mlen == 0 || iov != NULL) { + if (!mlen || iov) { conn->ksnc_rx_nkiov = 0; conn->ksnc_rx_kiov = NULL; conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov; @@ -1349,8 +1365,8 @@ ksocknal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, switch (conn->ksnc_rx_state) { case SOCKNAL_RX_PARSE_WAIT: list_add_tail(&conn->ksnc_rx_list, &sched->kss_rx_conns); - wake_up (&sched->kss_waitq); - LASSERT (conn->ksnc_rx_ready); + wake_up(&sched->kss_waitq); + LASSERT(conn->ksnc_rx_ready); break; case SOCKNAL_RX_PARSE: @@ -1396,7 +1412,7 @@ int ksocknal_scheduler(void *arg) cfs_block_allsigs(); rc = cfs_cpt_bind(lnet_cpt_table(), info->ksi_cpt); - if (rc != 0) { + if (rc) { CERROR("Can't set CPT affinity to %d: %d\n", info->ksi_cpt, rc); } @@ -1408,18 +1424,20 @@ int ksocknal_scheduler(void *arg) /* Ensure I progress everything semi-fairly */ - if (!list_empty (&sched->kss_rx_conns)) { + if (!list_empty(&sched->kss_rx_conns)) { conn = list_entry(sched->kss_rx_conns.next, - ksock_conn_t, ksnc_rx_list); + ksock_conn_t, ksnc_rx_list); list_del(&conn->ksnc_rx_list); LASSERT(conn->ksnc_rx_scheduled); LASSERT(conn->ksnc_rx_ready); - /* clear rx_ready in case receive isn't complete. + /* + * clear rx_ready in case receive isn't complete. * Do it BEFORE we call process_recv, since * data_ready can set it any time after we release - * kss_lock. */ + * kss_lock. + */ conn->ksnc_rx_ready = 0; spin_unlock_bh(&sched->kss_lock); @@ -1431,18 +1449,20 @@ int ksocknal_scheduler(void *arg) LASSERT(conn->ksnc_rx_scheduled); /* Did process_receive get everything it wanted? */ - if (rc == 0) + if (!rc) conn->ksnc_rx_ready = 1; if (conn->ksnc_rx_state == SOCKNAL_RX_PARSE) { - /* Conn blocked waiting for ksocknal_recv() + /* + * Conn blocked waiting for ksocknal_recv() * I change its state (under lock) to signal - * it can be rescheduled */ + * it can be rescheduled + */ conn->ksnc_rx_state = SOCKNAL_RX_PARSE_WAIT; } else if (conn->ksnc_rx_ready) { /* reschedule for rx */ - list_add_tail (&conn->ksnc_rx_list, - &sched->kss_rx_conns); + list_add_tail(&conn->ksnc_rx_list, + &sched->kss_rx_conns); } else { conn->ksnc_rx_scheduled = 0; /* drop my ref */ @@ -1452,25 +1472,24 @@ int ksocknal_scheduler(void *arg) did_something = 1; } - if (!list_empty (&sched->kss_tx_conns)) { + if (!list_empty(&sched->kss_tx_conns)) { LIST_HEAD(zlist); if (!list_empty(&sched->kss_zombie_noop_txs)) { - list_add(&zlist, - &sched->kss_zombie_noop_txs); + list_add(&zlist, &sched->kss_zombie_noop_txs); list_del_init(&sched->kss_zombie_noop_txs); } conn = list_entry(sched->kss_tx_conns.next, - ksock_conn_t, ksnc_tx_list); - list_del (&conn->ksnc_tx_list); + ksock_conn_t, ksnc_tx_list); + list_del(&conn->ksnc_tx_list); LASSERT(conn->ksnc_tx_scheduled); LASSERT(conn->ksnc_tx_ready); LASSERT(!list_empty(&conn->ksnc_tx_queue)); tx = list_entry(conn->ksnc_tx_queue.next, - ksock_tx_t, tx_list); + ksock_tx_t, tx_list); if (conn->ksnc_tx_carrier == tx) ksocknal_next_tx_carrier(conn); @@ -1478,16 +1497,20 @@ int ksocknal_scheduler(void *arg) /* dequeue now so empty list => more to send */ list_del(&tx->tx_list); - /* Clear tx_ready in case send isn't complete. Do + /* + * Clear tx_ready in case send isn't complete. Do * it BEFORE we call process_transmit, since * write_space can set it any time after we release - * kss_lock. */ + * kss_lock. + */ conn->ksnc_tx_ready = 0; spin_unlock_bh(&sched->kss_lock); if (!list_empty(&zlist)) { - /* free zombie noop txs, it's fast because - * noop txs are just put in freelist */ + /* + * free zombie noop txs, it's fast because + * noop txs are just put in freelist + */ ksocknal_txlist_done(NULL, &zlist, 0); } @@ -1496,8 +1519,7 @@ int ksocknal_scheduler(void *arg) if (rc == -ENOMEM || rc == -EAGAIN) { /* Incomplete send: replace tx on HEAD of tx_queue */ spin_lock_bh(&sched->kss_lock); - list_add(&tx->tx_list, - &conn->ksnc_tx_queue); + list_add(&tx->tx_list, &conn->ksnc_tx_queue); } else { /* Complete send; tx -ref */ ksocknal_tx_decref(tx); @@ -1508,13 +1530,15 @@ int ksocknal_scheduler(void *arg) } if (rc == -ENOMEM) { - /* Do nothing; after a short timeout, this - * conn will be reposted on kss_tx_conns. */ + /* + * Do nothing; after a short timeout, this + * conn will be reposted on kss_tx_conns. + */ } else if (conn->ksnc_tx_ready && !list_empty(&conn->ksnc_tx_queue)) { /* reschedule for tx */ list_add_tail(&conn->ksnc_tx_list, - &sched->kss_tx_conns); + &sched->kss_tx_conns); } else { conn->ksnc_tx_scheduled = 0; /* drop my ref */ @@ -1533,7 +1557,7 @@ int ksocknal_scheduler(void *arg) rc = wait_event_interruptible_exclusive( sched->kss_waitq, !ksocknal_sched_cansleep(sched)); - LASSERT (rc == 0); + LASSERT(!rc); } else { cond_resched(); } @@ -1551,7 +1575,7 @@ int ksocknal_scheduler(void *arg) * Add connection to kss_rx_conns of scheduler * and wakeup the scheduler. */ -void ksocknal_read_callback (ksock_conn_t *conn) +void ksocknal_read_callback(ksock_conn_t *conn) { ksock_sched_t *sched; @@ -1562,13 +1586,12 @@ void ksocknal_read_callback (ksock_conn_t *conn) conn->ksnc_rx_ready = 1; if (!conn->ksnc_rx_scheduled) { /* not being progressed */ - list_add_tail(&conn->ksnc_rx_list, - &sched->kss_rx_conns); + list_add_tail(&conn->ksnc_rx_list, &sched->kss_rx_conns); conn->ksnc_rx_scheduled = 1; /* extra ref for scheduler */ ksocknal_conn_addref(conn); - wake_up (&sched->kss_waitq); + wake_up(&sched->kss_waitq); } spin_unlock_bh(&sched->kss_lock); } @@ -1577,7 +1600,7 @@ void ksocknal_read_callback (ksock_conn_t *conn) * Add connection to kss_tx_conns of scheduler * and wakeup the scheduler. */ -void ksocknal_write_callback (ksock_conn_t *conn) +void ksocknal_write_callback(ksock_conn_t *conn) { ksock_sched_t *sched; @@ -1589,20 +1612,19 @@ void ksocknal_write_callback (ksock_conn_t *conn) if (!conn->ksnc_tx_scheduled && /* not being progressed */ !list_empty(&conn->ksnc_tx_queue)) { /* packets to send */ - list_add_tail (&conn->ksnc_tx_list, - &sched->kss_tx_conns); + list_add_tail(&conn->ksnc_tx_list, &sched->kss_tx_conns); conn->ksnc_tx_scheduled = 1; /* extra ref for scheduler */ ksocknal_conn_addref(conn); - wake_up (&sched->kss_waitq); + wake_up(&sched->kss_waitq); } spin_unlock_bh(&sched->kss_lock); } static ksock_proto_t * -ksocknal_parse_proto_version (ksock_hello_msg_t *hello) +ksocknal_parse_proto_version(ksock_hello_msg_t *hello) { __u32 version = 0; @@ -1611,7 +1633,7 @@ ksocknal_parse_proto_version (ksock_hello_msg_t *hello) else if (hello->kshm_magic == __swab32(LNET_PROTO_MAGIC)) version = __swab32(hello->kshm_version); - if (version != 0) { + if (version) { #if SOCKNAL_VERSION_DEBUG if (*ksocknal_tunables.ksnd_protocol == 1) return NULL; @@ -1632,11 +1654,11 @@ ksocknal_parse_proto_version (ksock_hello_msg_t *hello) if (hello->kshm_magic == le32_to_cpu(LNET_PROTO_TCP_MAGIC)) { lnet_magicversion_t *hmv = (lnet_magicversion_t *)hello; - CLASSERT(sizeof (lnet_magicversion_t) == - offsetof (ksock_hello_msg_t, kshm_src_nid)); + CLASSERT(sizeof(lnet_magicversion_t) == + offsetof(ksock_hello_msg_t, kshm_src_nid)); - if (hmv->version_major == cpu_to_le16 (KSOCK_PROTO_V1_MAJOR) && - hmv->version_minor == cpu_to_le16 (KSOCK_PROTO_V1_MINOR)) + if (hmv->version_major == cpu_to_le16(KSOCK_PROTO_V1_MAJOR) && + hmv->version_minor == cpu_to_le16(KSOCK_PROTO_V1_MINOR)) return &ksocknal_protocol_v1x; } @@ -1644,8 +1666,8 @@ ksocknal_parse_proto_version (ksock_hello_msg_t *hello) } int -ksocknal_send_hello (lnet_ni_t *ni, ksock_conn_t *conn, - lnet_nid_t peer_nid, ksock_hello_msg_t *hello) +ksocknal_send_hello(lnet_ni_t *ni, ksock_conn_t *conn, + lnet_nid_t peer_nid, ksock_hello_msg_t *hello) { /* CAVEAT EMPTOR: this byte flips 'ipaddrs' */ ksock_net_t *net = (ksock_net_t *)ni->ni_data; @@ -1653,7 +1675,7 @@ ksocknal_send_hello (lnet_ni_t *ni, ksock_conn_t *conn, LASSERT(hello->kshm_nips <= LNET_MAX_INTERFACES); /* rely on caller to hold a ref on socket so it wouldn't disappear */ - LASSERT(conn->ksnc_proto != NULL); + LASSERT(conn->ksnc_proto); hello->kshm_src_nid = ni->ni_nid; hello->kshm_dst_nid = peer_nid; @@ -1682,9 +1704,9 @@ ksocknal_invert_type(int type) } int -ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, - ksock_hello_msg_t *hello, lnet_process_id_t *peerid, - __u64 *incarnation) +ksocknal_recv_hello(lnet_ni_t *ni, ksock_conn_t *conn, + ksock_hello_msg_t *hello, lnet_process_id_t *peerid, + __u64 *incarnation) { /* Return < 0 fatal error * 0 success @@ -1692,7 +1714,7 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, * EPROTO protocol version mismatch */ struct socket *sock = conn->ksnc_sock; - int active = (conn->ksnc_proto != NULL); + int active = !!conn->ksnc_proto; int timeout; int proto_match; int rc; @@ -1705,20 +1727,20 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, timeout = active ? *ksocknal_tunables.ksnd_timeout : lnet_acceptor_timeout(); - rc = lnet_sock_read(sock, &hello->kshm_magic, sizeof (hello->kshm_magic), timeout); - if (rc != 0) { + rc = lnet_sock_read(sock, &hello->kshm_magic, sizeof(hello->kshm_magic), timeout); + if (rc) { CERROR("Error %d reading HELLO from %pI4h\n", - rc, &conn->ksnc_ipaddr); - LASSERT (rc < 0); + rc, &conn->ksnc_ipaddr); + LASSERT(rc < 0); return rc; } if (hello->kshm_magic != LNET_PROTO_MAGIC && hello->kshm_magic != __swab32(LNET_PROTO_MAGIC) && - hello->kshm_magic != le32_to_cpu (LNET_PROTO_TCP_MAGIC)) { + hello->kshm_magic != le32_to_cpu(LNET_PROTO_TCP_MAGIC)) { /* Unexpected magic! */ CERROR("Bad magic(1) %#08x (%#08x expected) from %pI4h\n", - __cpu_to_le32 (hello->kshm_magic), + __cpu_to_le32(hello->kshm_magic), LNET_PROTO_TCP_MAGIC, &conn->ksnc_ipaddr); return -EPROTO; @@ -1726,15 +1748,15 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, rc = lnet_sock_read(sock, &hello->kshm_version, sizeof(hello->kshm_version), timeout); - if (rc != 0) { + if (rc) { CERROR("Error %d reading HELLO from %pI4h\n", - rc, &conn->ksnc_ipaddr); + rc, &conn->ksnc_ipaddr); LASSERT(rc < 0); return rc; } proto = ksocknal_parse_proto_version(hello); - if (proto == NULL) { + if (!proto) { if (!active) { /* unknown protocol from peer, tell peer my protocol */ conn->ksnc_proto = &ksocknal_protocol_v3x; @@ -1760,7 +1782,7 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, /* receive the rest of hello message anyway */ rc = conn->ksnc_proto->pro_recv_hello(conn, hello, timeout); - if (rc != 0) { + if (rc) { CERROR("Error %d reading or checking hello from from %pI4h\n", rc, &conn->ksnc_ipaddr); LASSERT(rc < 0); @@ -1792,8 +1814,8 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, conn->ksnc_type = ksocknal_invert_type(hello->kshm_ctype); if (conn->ksnc_type == SOCKLND_CONN_NONE) { CERROR("Unexpected type %d from %s ip %pI4h\n", - hello->kshm_ctype, libcfs_id2str(*peerid), - &conn->ksnc_ipaddr); + hello->kshm_ctype, libcfs_id2str(*peerid), + &conn->ksnc_ipaddr); return -EPROTO; } @@ -1816,9 +1838,8 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, if (ksocknal_invert_type(hello->kshm_ctype) != conn->ksnc_type) { CERROR("Mismatched types: me %d, %s ip %pI4h %d\n", - conn->ksnc_type, libcfs_id2str(*peerid), - &conn->ksnc_ipaddr, - hello->kshm_ctype); + conn->ksnc_type, libcfs_id2str(*peerid), + &conn->ksnc_ipaddr, hello->kshm_ctype); return -EPROTO; } @@ -1826,7 +1847,7 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, } static int -ksocknal_connect (ksock_route_t *route) +ksocknal_connect(ksock_route_t *route) { LIST_HEAD(zombies); ksock_peer_t *peer = route->ksnr_peer; @@ -1850,10 +1871,12 @@ ksocknal_connect (ksock_route_t *route) for (;;) { wanted = ksocknal_route_mask() & ~route->ksnr_connected; - /* stop connecting if peer/route got closed under me, or - * route got connected while queued */ + /* + * stop connecting if peer/route got closed under me, or + * route got connected while queued + */ if (peer->ksnp_closing || route->ksnr_deleted || - wanted == 0) { + !wanted) { retry_later = 0; break; } @@ -1869,14 +1892,14 @@ ksocknal_connect (ksock_route_t *route) if (retry_later) /* needs reschedule */ break; - if ((wanted & (1 << SOCKLND_CONN_ANY)) != 0) { + if (wanted & (1 << SOCKLND_CONN_ANY)) { type = SOCKLND_CONN_ANY; - } else if ((wanted & (1 << SOCKLND_CONN_CONTROL)) != 0) { + } else if (wanted & (1 << SOCKLND_CONN_CONTROL)) { type = SOCKLND_CONN_CONTROL; - } else if ((wanted & (1 << SOCKLND_CONN_BULK_IN)) != 0) { + } else if (wanted & (1 << SOCKLND_CONN_BULK_IN)) { type = SOCKLND_CONN_BULK_IN; } else { - LASSERT ((wanted & (1 << SOCKLND_CONN_BULK_OUT)) != 0); + LASSERT(wanted & (1 << SOCKLND_CONN_BULK_OUT)); type = SOCKLND_CONN_BULK_OUT; } @@ -1893,7 +1916,7 @@ ksocknal_connect (ksock_route_t *route) rc = lnet_connect(&sock, peer->ksnp_id.nid, route->ksnr_myipaddr, route->ksnr_ipaddr, route->ksnr_port); - if (rc != 0) + if (rc) goto failed; rc = ksocknal_create_conn(peer->ksnp_ni, route, sock, type); @@ -1904,9 +1927,11 @@ ksocknal_connect (ksock_route_t *route) goto failed; } - /* A +ve RC means I have to retry because I lost the connection - * race or I have to renegotiate protocol version */ - retry_later = (rc != 0); + /* + * A +ve RC means I have to retry because I lost the connection + * race or I have to renegotiate protocol version + */ + retry_later = (rc); if (retry_later) CDEBUG(D_NET, "peer %s: conn race, retry later.\n", libcfs_nid2str(peer->ksnp_id.nid)); @@ -1918,17 +1943,20 @@ ksocknal_connect (ksock_route_t *route) route->ksnr_connecting = 0; if (retry_later) { - /* re-queue for attention; this frees me up to handle - * the peer's incoming connection request */ - + /* + * re-queue for attention; this frees me up to handle + * the peer's incoming connection request + */ if (rc == EALREADY || - (rc == 0 && peer->ksnp_accepting > 0)) { - /* We want to introduce a delay before next + (!rc && peer->ksnp_accepting > 0)) { + /* + * We want to introduce a delay before next * attempt to connect if we lost conn race, * but the race is resolved quickly usually, - * so min_reconnectms should be good heuristic */ + * so min_reconnectms should be good heuristic + */ route->ksnr_retry_interval = - cfs_time_seconds(*ksocknal_tunables.ksnd_min_reconnectms)/1000; + cfs_time_seconds(*ksocknal_tunables.ksnd_min_reconnectms) / 1000; route->ksnr_timeout = cfs_time_add(cfs_time_current(), route->ksnr_retry_interval); } @@ -1949,30 +1977,34 @@ ksocknal_connect (ksock_route_t *route) route->ksnr_retry_interval *= 2; route->ksnr_retry_interval = max(route->ksnr_retry_interval, - cfs_time_seconds(*ksocknal_tunables.ksnd_min_reconnectms)/1000); + cfs_time_seconds(*ksocknal_tunables.ksnd_min_reconnectms) / 1000); route->ksnr_retry_interval = min(route->ksnr_retry_interval, - cfs_time_seconds(*ksocknal_tunables.ksnd_max_reconnectms)/1000); + cfs_time_seconds(*ksocknal_tunables.ksnd_max_reconnectms) / 1000); - LASSERT (route->ksnr_retry_interval != 0); + LASSERT(route->ksnr_retry_interval); route->ksnr_timeout = cfs_time_add(cfs_time_current(), route->ksnr_retry_interval); if (!list_empty(&peer->ksnp_tx_queue) && - peer->ksnp_accepting == 0 && - ksocknal_find_connecting_route_locked(peer) == NULL) { + !peer->ksnp_accepting && + !ksocknal_find_connecting_route_locked(peer)) { ksock_conn_t *conn; - /* ksnp_tx_queue is queued on a conn on successful - * connection for V1.x and V2.x */ - if (!list_empty (&peer->ksnp_conns)) { + /* + * ksnp_tx_queue is queued on a conn on successful + * connection for V1.x and V2.x + */ + if (!list_empty(&peer->ksnp_conns)) { conn = list_entry(peer->ksnp_conns.next, - ksock_conn_t, ksnc_list); - LASSERT (conn->ksnc_proto == &ksocknal_protocol_v3x); + ksock_conn_t, ksnc_list); + LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x); } - /* take all the blocked packets while I've got the lock and - * complete below... */ + /* + * take all the blocked packets while I've got the lock and + * complete below... + */ list_splice_init(&peer->ksnp_tx_queue, &zombies); } @@ -2011,8 +2043,10 @@ ksocknal_connd_check_start(time64_t sec, long *timeout) if (total >= *ksocknal_tunables.ksnd_nconnds_max || total > ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV) { - /* can't create more connd, or still have enough - * threads to handle more connecting */ + /* + * can't create more connd, or still have enough + * threads to handle more connecting + */ return 0; } @@ -2041,7 +2075,7 @@ ksocknal_connd_check_start(time64_t sec, long *timeout) rc = ksocknal_thread_start(ksocknal_connd, NULL, name); spin_lock_bh(&ksocknal_data.ksnd_connd_lock); - if (rc == 0) + if (!rc) return 1; /* we tried ... */ @@ -2093,8 +2127,10 @@ ksocknal_connd_check_stop(time64_t sec, long *timeout) ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV; } -/* Go through connd_routes queue looking for a route that we can process - * right now, @timeout_p can be updated if we need to come back later */ +/* + * Go through connd_routes queue looking for a route that we can process + * right now, @timeout_p can be updated if we need to come back later + */ static ksock_route_t * ksocknal_connd_get_route_locked(signed long *timeout_p) { @@ -2104,10 +2140,9 @@ ksocknal_connd_get_route_locked(signed long *timeout_p) now = cfs_time_current(); /* connd_routes can contain both pending and ordinary routes */ - list_for_each_entry (route, &ksocknal_data.ksnd_connd_routes, - ksnr_connd_list) { - - if (route->ksnr_retry_interval == 0 || + list_for_each_entry(route, &ksocknal_data.ksnd_connd_routes, + ksnr_connd_list) { + if (!route->ksnr_retry_interval || cfs_time_aftereq(now, route->ksnr_timeout)) return route; @@ -2120,7 +2155,7 @@ ksocknal_connd_get_route_locked(signed long *timeout_p) } int -ksocknal_connd (void *arg) +ksocknal_connd(void *arg) { spinlock_t *connd_lock = &ksocknal_data.ksnd_connd_lock; ksock_connreq_t *cr; @@ -2172,15 +2207,17 @@ ksocknal_connd (void *arg) spin_lock_bh(connd_lock); } - /* Only handle an outgoing connection request if there + /* + * Only handle an outgoing connection request if there * is a thread left to handle incoming connections and - * create new connd */ + * create new connd + */ if (ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV < ksocknal_data.ksnd_connd_running) { route = ksocknal_connd_get_route_locked(&timeout); } - if (route != NULL) { - list_del (&route->ksnr_connd_list); + if (route) { + list_del(&route->ksnr_connd_list); ksocknal_data.ksnd_connd_connecting++; spin_unlock_bh(connd_lock); dropped_lock = 1; @@ -2231,24 +2268,26 @@ ksocknal_connd (void *arg) } static ksock_conn_t * -ksocknal_find_timed_out_conn (ksock_peer_t *peer) +ksocknal_find_timed_out_conn(ksock_peer_t *peer) { /* We're called with a shared lock on ksnd_global_lock */ ksock_conn_t *conn; struct list_head *ctmp; - list_for_each (ctmp, &peer->ksnp_conns) { + list_for_each(ctmp, &peer->ksnp_conns) { int error; - conn = list_entry (ctmp, ksock_conn_t, ksnc_list); + conn = list_entry(ctmp, ksock_conn_t, ksnc_list); /* Don't need the {get,put}connsock dance to deref ksnc_sock */ LASSERT(!conn->ksnc_closing); - /* SOCK_ERROR will reset error code of socket in - * some platform (like Darwin8.x) */ + /* + * SOCK_ERROR will reset error code of socket in + * some platform (like Darwin8.x) + */ error = conn->ksnc_sock->sk->sk_err; - if (error != 0) { + if (error) { ksocknal_conn_addref(conn); switch (error) { @@ -2292,11 +2331,13 @@ ksocknal_find_timed_out_conn (ksock_peer_t *peer) } if ((!list_empty(&conn->ksnc_tx_queue) || - conn->ksnc_sock->sk->sk_wmem_queued != 0) && + conn->ksnc_sock->sk->sk_wmem_queued) && cfs_time_aftereq(cfs_time_current(), conn->ksnc_tx_deadline)) { - /* Timed out messages queued for sending or - * buffered in the socket's send buffer */ + /* + * Timed out messages queued for sending or + * buffered in the socket's send buffer + */ ksocknal_conn_addref(conn); CNETERR("Timeout sending data to %s (%pI4h:%d) the network or that node may be down.\n", libcfs_id2str(peer->ksnp_id), @@ -2313,20 +2354,18 @@ static inline void ksocknal_flush_stale_txs(ksock_peer_t *peer) { ksock_tx_t *tx; + ksock_tx_t *tmp; LIST_HEAD(stale_txs); write_lock_bh(&ksocknal_data.ksnd_global_lock); - while (!list_empty (&peer->ksnp_tx_queue)) { - tx = list_entry (peer->ksnp_tx_queue.next, - ksock_tx_t, tx_list); - + list_for_each_entry_safe(tx, tmp, &peer->ksnp_tx_queue, tx_list) { if (!cfs_time_aftereq(cfs_time_current(), tx->tx_deadline)) break; - list_del (&tx->tx_list); - list_add_tail (&tx->tx_list, &stale_txs); + list_del(&tx->tx_list); + list_add_tail(&tx->tx_list, &stale_txs); } write_unlock_bh(&ksocknal_data.ksnd_global_lock); @@ -2336,6 +2375,7 @@ ksocknal_flush_stale_txs(ksock_peer_t *peer) static int ksocknal_send_keepalive_locked(ksock_peer_t *peer) + __must_hold(&ksocknal_data.ksnd_global_lock) { ksock_sched_t *sched; ksock_conn_t *conn; @@ -2356,12 +2396,14 @@ ksocknal_send_keepalive_locked(ksock_peer_t *peer) if (time_before(cfs_time_current(), peer->ksnp_send_keepalive)) return 0; - /* retry 10 secs later, so we wouldn't put pressure - * on this peer if we failed to send keepalive this time */ + /* + * retry 10 secs later, so we wouldn't put pressure + * on this peer if we failed to send keepalive this time + */ peer->ksnp_send_keepalive = cfs_time_shift(10); conn = ksocknal_find_conn_locked(peer, NULL, 1); - if (conn != NULL) { + if (conn) { sched = conn->ksnc_scheduler; spin_lock_bh(&sched->kss_lock); @@ -2378,12 +2420,12 @@ ksocknal_send_keepalive_locked(ksock_peer_t *peer) /* cookie = 1 is reserved for keepalive PING */ tx = ksocknal_alloc_tx_noop(1, 1); - if (tx == NULL) { + if (!tx) { read_lock(&ksocknal_data.ksnd_global_lock); return -ENOMEM; } - if (ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id) == 0) { + if (!ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id)) { read_lock(&ksocknal_data.ksnd_global_lock); return 1; } @@ -2395,7 +2437,7 @@ ksocknal_send_keepalive_locked(ksock_peer_t *peer) } static void -ksocknal_check_peer_timeouts (int idx) +ksocknal_check_peer_timeouts(int idx) { struct list_head *peers = &ksocknal_data.ksnd_peers[idx]; ksock_peer_t *peer; @@ -2403,9 +2445,11 @@ ksocknal_check_peer_timeouts (int idx) ksock_tx_t *tx; again: - /* NB. We expect to have a look at all the peers and not find any + /* + * NB. We expect to have a look at all the peers and not find any * connections to time out, so we just use a shared lock while we - * take a look... */ + * take a look... + */ read_lock(&ksocknal_data.ksnd_global_lock); list_for_each_entry(peer, peers, ksnp_list) { @@ -2413,35 +2457,37 @@ ksocknal_check_peer_timeouts (int idx) int resid = 0; int n = 0; - if (ksocknal_send_keepalive_locked(peer) != 0) { + if (ksocknal_send_keepalive_locked(peer)) { read_unlock(&ksocknal_data.ksnd_global_lock); goto again; } - conn = ksocknal_find_timed_out_conn (peer); + conn = ksocknal_find_timed_out_conn(peer); - if (conn != NULL) { + if (conn) { read_unlock(&ksocknal_data.ksnd_global_lock); - ksocknal_close_conn_and_siblings (conn, -ETIMEDOUT); + ksocknal_close_conn_and_siblings(conn, -ETIMEDOUT); - /* NB we won't find this one again, but we can't + /* + * NB we won't find this one again, but we can't * just proceed with the next peer, since we dropped - * ksnd_global_lock and it might be dead already! */ + * ksnd_global_lock and it might be dead already! + */ ksocknal_conn_decref(conn); goto again; } - /* we can't process stale txs right here because we're - * holding only shared lock */ - if (!list_empty (&peer->ksnp_tx_queue)) { - ksock_tx_t *tx = - list_entry (peer->ksnp_tx_queue.next, - ksock_tx_t, tx_list); + /* + * we can't process stale txs right here because we're + * holding only shared lock + */ + if (!list_empty(&peer->ksnp_tx_queue)) { + ksock_tx_t *tx = list_entry(peer->ksnp_tx_queue.next, + ksock_tx_t, tx_list); if (cfs_time_aftereq(cfs_time_current(), tx->tx_deadline)) { - ksocknal_peer_addref(peer); read_unlock(&ksocknal_data.ksnd_global_lock); @@ -2466,13 +2512,13 @@ ksocknal_check_peer_timeouts (int idx) n++; } - if (n == 0) { + if (!n) { spin_unlock(&peer->ksnp_lock); continue; } tx = list_entry(peer->ksnp_zc_req_list.next, - ksock_tx_t, tx_zc_list); + ksock_tx_t, tx_zc_list); deadline = tx->tx_deadline; resid = tx->tx_resid; conn = tx->tx_conn; @@ -2486,7 +2532,7 @@ ksocknal_check_peer_timeouts (int idx) cfs_duration_sec(cfs_time_current() - deadline), resid, conn->ksnc_sock->sk->sk_wmem_queued); - ksocknal_close_conn_and_siblings (conn, -ETIMEDOUT); + ksocknal_close_conn_and_siblings(conn, -ETIMEDOUT); ksocknal_conn_decref(conn); goto again; } @@ -2495,7 +2541,7 @@ ksocknal_check_peer_timeouts (int idx) } int -ksocknal_reaper (void *arg) +ksocknal_reaper(void *arg) { wait_queue_t wait; ksock_conn_t *conn; @@ -2515,12 +2561,10 @@ ksocknal_reaper (void *arg) spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); while (!ksocknal_data.ksnd_shuttingdown) { - - if (!list_empty (&ksocknal_data.ksnd_deathrow_conns)) { - conn = list_entry (ksocknal_data. \ - ksnd_deathrow_conns.next, - ksock_conn_t, ksnc_list); - list_del (&conn->ksnc_list); + if (!list_empty(&ksocknal_data.ksnd_deathrow_conns)) { + conn = list_entry(ksocknal_data.ksnd_deathrow_conns.next, + ksock_conn_t, ksnc_list); + list_del(&conn->ksnc_list); spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock); @@ -2531,10 +2575,10 @@ ksocknal_reaper (void *arg) continue; } - if (!list_empty (&ksocknal_data.ksnd_zombie_conns)) { - conn = list_entry (ksocknal_data.ksnd_zombie_conns.\ - next, ksock_conn_t, ksnc_list); - list_del (&conn->ksnc_list); + if (!list_empty(&ksocknal_data.ksnd_zombie_conns)) { + conn = list_entry(ksocknal_data.ksnd_zombie_conns.next, + ksock_conn_t, ksnc_list); + list_del(&conn->ksnc_list); spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock); @@ -2544,9 +2588,9 @@ ksocknal_reaper (void *arg) continue; } - if (!list_empty (&ksocknal_data.ksnd_enomem_conns)) { + if (!list_empty(&ksocknal_data.ksnd_enomem_conns)) { list_add(&enomem_conns, - &ksocknal_data.ksnd_enomem_conns); + &ksocknal_data.ksnd_enomem_conns); list_del_init(&ksocknal_data.ksnd_enomem_conns); } @@ -2554,10 +2598,10 @@ ksocknal_reaper (void *arg) /* reschedule all the connections that stalled with ENOMEM... */ nenomem_conns = 0; - while (!list_empty (&enomem_conns)) { - conn = list_entry (enomem_conns.next, - ksock_conn_t, ksnc_tx_list); - list_del (&conn->ksnc_tx_list); + while (!list_empty(&enomem_conns)) { + conn = list_entry(enomem_conns.next, ksock_conn_t, + ksnc_tx_list); + list_del(&conn->ksnc_tx_list); sched = conn->ksnc_scheduler; @@ -2566,7 +2610,7 @@ ksocknal_reaper (void *arg) LASSERT(conn->ksnc_tx_scheduled); conn->ksnc_tx_ready = 1; list_add_tail(&conn->ksnc_tx_list, - &sched->kss_tx_conns); + &sched->kss_tx_conns); wake_up(&sched->kss_waitq); spin_unlock_bh(&sched->kss_lock); @@ -2580,21 +2624,22 @@ ksocknal_reaper (void *arg) const int p = 1; int chunk = ksocknal_data.ksnd_peer_hash_size; - /* Time to check for timeouts on a few more peers: I do + /* + * Time to check for timeouts on a few more peers: I do * checks every 'p' seconds on a proportion of the peer * table and I need to check every connection 'n' times * within a timeout interval, to ensure I detect a * timeout on any connection within (n+1)/n times the - * timeout interval. */ - + * timeout interval. + */ if (*ksocknal_tunables.ksnd_timeout > n * p) chunk = (chunk * n * p) / *ksocknal_tunables.ksnd_timeout; - if (chunk == 0) + if (!chunk) chunk = 1; for (i = 0; i < chunk; i++) { - ksocknal_check_peer_timeouts (peer_index); + ksocknal_check_peer_timeouts(peer_index); peer_index = (peer_index + 1) % ksocknal_data.ksnd_peer_hash_size; } @@ -2602,25 +2647,27 @@ ksocknal_reaper (void *arg) deadline = cfs_time_add(deadline, cfs_time_seconds(p)); } - if (nenomem_conns != 0) { - /* Reduce my timeout if I rescheduled ENOMEM conns. + if (nenomem_conns) { + /* + * Reduce my timeout if I rescheduled ENOMEM conns. * This also prevents me getting woken immediately - * if any go back on my enomem list. */ + * if any go back on my enomem list. + */ timeout = SOCKNAL_ENOMEM_RETRY; } ksocknal_data.ksnd_reaper_waketime = cfs_time_add(cfs_time_current(), timeout); - set_current_state (TASK_INTERRUPTIBLE); - add_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait); + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&ksocknal_data.ksnd_reaper_waitq, &wait); if (!ksocknal_data.ksnd_shuttingdown && - list_empty (&ksocknal_data.ksnd_deathrow_conns) && - list_empty (&ksocknal_data.ksnd_zombie_conns)) + list_empty(&ksocknal_data.ksnd_deathrow_conns) && + list_empty(&ksocknal_data.ksnd_zombie_conns)) schedule_timeout(timeout); - set_current_state (TASK_RUNNING); - remove_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait); + set_current_state(TASK_RUNNING); + remove_wait_queue(&ksocknal_data.ksnd_reaper_waitq, &wait); spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); } diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c index cf8e43bd3..d4ce06d0a 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c @@ -45,13 +45,13 @@ ksocknal_lib_get_conn_addrs(ksock_conn_t *conn) /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */ LASSERT(!conn->ksnc_closing); - if (rc != 0) { + if (rc) { CERROR("Error %d getting sock peer IP\n", rc); return rc; } rc = lnet_sock_getaddr(conn->ksnc_sock, 0, &conn->ksnc_myipaddr, NULL); - if (rc != 0) { + if (rc) { CERROR("Error %d getting sock local IP\n", rc); return rc; } @@ -67,9 +67,11 @@ ksocknal_lib_zc_capable(ksock_conn_t *conn) if (conn->ksnc_proto == &ksocknal_protocol_v1x) return 0; - /* ZC if the socket supports scatter/gather and doesn't need software - * checksums */ - return ((caps & NETIF_F_SG) != 0 && (caps & NETIF_F_CSUM_MASK) != 0); + /* + * ZC if the socket supports scatter/gather and doesn't need software + * checksums + */ + return ((caps & NETIF_F_SG) && (caps & NETIF_F_CSUM_MASK)); } int @@ -82,12 +84,13 @@ ksocknal_lib_send_iov(ksock_conn_t *conn, ksock_tx_t *tx) if (*ksocknal_tunables.ksnd_enable_csum && /* checksum enabled */ conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection */ tx->tx_nob == tx->tx_resid && /* frist sending */ - tx->tx_msg.ksm_csum == 0) /* not checksummed */ + !tx->tx_msg.ksm_csum) /* not checksummed */ ksocknal_lib_csum_tx(tx); - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone. */ - + /* + * NB we can't trust socket ops to either consume our iovs + * or leave them alone. + */ { #if SOCKNAL_SINGLE_FRAG_TX struct kvec scratch; @@ -123,11 +126,13 @@ ksocknal_lib_send_kiov(ksock_conn_t *conn, ksock_tx_t *tx) int nob; /* Not NOOP message */ - LASSERT(tx->tx_lnetmsg != NULL); + LASSERT(tx->tx_lnetmsg); - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone. */ - if (tx->tx_msg.ksm_zc_cookies[0] != 0) { + /* + * NB we can't trust socket ops to either consume our iovs + * or leave them alone. + */ + if (tx->tx_msg.ksm_zc_cookies[0]) { /* Zero copy is enabled */ struct sock *sk = sock->sk; struct page *page = kiov->kiov_page; @@ -136,13 +141,13 @@ ksocknal_lib_send_kiov(ksock_conn_t *conn, ksock_tx_t *tx) int msgflg = MSG_DONTWAIT; CDEBUG(D_NET, "page %p + offset %x for %d\n", - page, offset, kiov->kiov_len); + page, offset, kiov->kiov_len); if (!list_empty(&conn->ksnc_tx_queue) || fragsize < tx->tx_resid) msgflg |= MSG_MORE; - if (sk->sk_prot->sendpage != NULL) { + if (sk->sk_prot->sendpage) { rc = sk->sk_prot->sendpage(sk, page, offset, fragsize, msgflg); } else { @@ -187,13 +192,14 @@ ksocknal_lib_eager_ack(ksock_conn_t *conn) int opt = 1; struct socket *sock = conn->ksnc_sock; - /* Remind the socket to ACK eagerly. If I don't, the socket might + /* + * Remind the socket to ACK eagerly. If I don't, the socket might * think I'm about to send something it could piggy-back the ACK * on, introducing delay in completing zero-copy sends in my - * peer. */ - - kernel_setsockopt(sock, SOL_TCP, TCP_QUICKACK, - (char *)&opt, sizeof(opt)); + * peer. + */ + kernel_setsockopt(sock, SOL_TCP, TCP_QUICKACK, (char *)&opt, + sizeof(opt)); } int @@ -218,8 +224,10 @@ ksocknal_lib_recv_iov(ksock_conn_t *conn) int sum; __u32 saved_csum; - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone. */ + /* + * NB we can't trust socket ops to either consume our iovs + * or leave them alone. + */ LASSERT(niov > 0); for (nob = i = 0; i < niov; i++) { @@ -228,8 +236,8 @@ ksocknal_lib_recv_iov(ksock_conn_t *conn) } LASSERT(nob <= conn->ksnc_rx_nob_wanted); - rc = kernel_recvmsg(conn->ksnc_sock, &msg, - scratchiov, niov, nob, MSG_DONTWAIT); + rc = kernel_recvmsg(conn->ksnc_sock, &msg, scratchiov, niov, nob, + MSG_DONTWAIT); saved_csum = 0; if (conn->ksnc_proto == &ksocknal_protocol_v2x) { @@ -237,7 +245,7 @@ ksocknal_lib_recv_iov(ksock_conn_t *conn) conn->ksnc_msg.ksm_csum = 0; } - if (saved_csum != 0) { + if (saved_csum) { /* accumulate checksum */ for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) { LASSERT(i < niov); @@ -258,7 +266,7 @@ ksocknal_lib_recv_iov(ksock_conn_t *conn) static void ksocknal_lib_kiov_vunmap(void *addr) { - if (addr == NULL) + if (!addr) return; vunmap(addr); @@ -272,7 +280,7 @@ ksocknal_lib_kiov_vmap(lnet_kiov_t *kiov, int niov, int nob; int i; - if (!*ksocknal_tunables.ksnd_zc_recv || pages == NULL) + if (!*ksocknal_tunables.ksnd_zc_recv || !pages) return NULL; LASSERT(niov <= LNET_MAX_IOV); @@ -282,8 +290,8 @@ ksocknal_lib_kiov_vmap(lnet_kiov_t *kiov, int niov, return NULL; for (nob = i = 0; i < niov; i++) { - if ((kiov[i].kiov_offset != 0 && i > 0) || - (kiov[i].kiov_offset + kiov[i].kiov_len != PAGE_CACHE_SIZE && i < niov - 1)) + if ((kiov[i].kiov_offset && i > 0) || + (kiov[i].kiov_offset + kiov[i].kiov_len != PAGE_SIZE && i < niov - 1)) return NULL; pages[i] = kiov[i].kiov_page; @@ -291,7 +299,7 @@ ksocknal_lib_kiov_vmap(lnet_kiov_t *kiov, int niov, } addr = vmap(pages, niov, VM_MAP, PAGE_KERNEL); - if (addr == NULL) + if (!addr) return NULL; iov->iov_base = addr + kiov[0].kiov_offset; @@ -329,10 +337,12 @@ ksocknal_lib_recv_kiov(ksock_conn_t *conn) int fragnob; int n; - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone. */ + /* + * NB we can't trust socket ops to either consume our iovs + * or leave them alone. + */ addr = ksocknal_lib_kiov_vmap(kiov, niov, scratchiov, pages); - if (addr != NULL) { + if (addr) { nob = scratchiov[0].iov_len; n = 1; @@ -347,17 +357,19 @@ ksocknal_lib_recv_kiov(ksock_conn_t *conn) LASSERT(nob <= conn->ksnc_rx_nob_wanted); - rc = kernel_recvmsg(conn->ksnc_sock, &msg, - (struct kvec *)scratchiov, n, nob, MSG_DONTWAIT); + rc = kernel_recvmsg(conn->ksnc_sock, &msg, (struct kvec *)scratchiov, + n, nob, MSG_DONTWAIT); - if (conn->ksnc_msg.ksm_csum != 0) { + if (conn->ksnc_msg.ksm_csum) { for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) { LASSERT(i < niov); - /* Dang! have to kmap again because I have nowhere to + /* + * Dang! have to kmap again because I have nowhere to * stash the mapped address. But by doing it while the * page is still mapped, the kernel just bumps the map - * count and returns me the address it stashed. */ + * count and returns me the address it stashed. + */ base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset; fragnob = kiov[i].kiov_len; if (fragnob > sum) @@ -370,7 +382,7 @@ ksocknal_lib_recv_kiov(ksock_conn_t *conn) } } - if (addr != NULL) { + if (addr) { ksocknal_lib_kiov_vunmap(addr); } else { for (i = 0; i < niov; i++) @@ -388,7 +400,7 @@ ksocknal_lib_csum_tx(ksock_tx_t *tx) void *base; LASSERT(tx->tx_iov[0].iov_base == &tx->tx_msg); - LASSERT(tx->tx_conn != NULL); + LASSERT(tx->tx_conn); LASSERT(tx->tx_conn->ksnc_proto == &ksocknal_protocol_v2x); tx->tx_msg.ksm_csum = 0; @@ -396,7 +408,7 @@ ksocknal_lib_csum_tx(ksock_tx_t *tx) csum = ksocknal_csum(~0, tx->tx_iov[0].iov_base, tx->tx_iov[0].iov_len); - if (tx->tx_kiov != NULL) { + if (tx->tx_kiov) { for (i = 0; i < tx->tx_nkiov; i++) { base = kmap(tx->tx_kiov[i].kiov_page) + tx->tx_kiov[i].kiov_offset; @@ -427,22 +439,22 @@ ksocknal_lib_get_conn_tunables(ksock_conn_t *conn, int *txmem, int *rxmem, int * int rc; rc = ksocknal_connsock_addref(conn); - if (rc != 0) { + if (rc) { LASSERT(conn->ksnc_closing); *txmem = *rxmem = *nagle = 0; return -ESHUTDOWN; } rc = lnet_sock_getbuf(sock, txmem, rxmem); - if (rc == 0) { + if (!rc) { len = sizeof(*nagle); rc = kernel_getsockopt(sock, SOL_TCP, TCP_NODELAY, - (char *)nagle, &len); + (char *)nagle, &len); } ksocknal_connsock_decref(conn); - if (rc == 0) + if (!rc) *nagle = !*nagle; else *txmem = *rxmem = *nagle = 0; @@ -463,23 +475,24 @@ ksocknal_lib_setup_sock(struct socket *sock) sock->sk->sk_allocation = GFP_NOFS; - /* Ensure this socket aborts active sends immediately when we close - * it. */ - + /* + * Ensure this socket aborts active sends immediately when we close + * it. + */ linger.l_onoff = 0; linger.l_linger = 0; - rc = kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, - (char *)&linger, sizeof(linger)); - if (rc != 0) { + rc = kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, (char *)&linger, + sizeof(linger)); + if (rc) { CERROR("Can't set SO_LINGER: %d\n", rc); return rc; } option = -1; - rc = kernel_setsockopt(sock, SOL_TCP, TCP_LINGER2, - (char *)&option, sizeof(option)); - if (rc != 0) { + rc = kernel_setsockopt(sock, SOL_TCP, TCP_LINGER2, (char *)&option, + sizeof(option)); + if (rc) { CERROR("Can't set SO_LINGER2: %d\n", rc); return rc; } @@ -488,8 +501,8 @@ ksocknal_lib_setup_sock(struct socket *sock) option = 1; rc = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, - (char *)&option, sizeof(option)); - if (rc != 0) { + (char *)&option, sizeof(option)); + if (rc) { CERROR("Can't disable nagle: %d\n", rc); return rc; } @@ -497,10 +510,10 @@ ksocknal_lib_setup_sock(struct socket *sock) rc = lnet_sock_setbuf(sock, *ksocknal_tunables.ksnd_tx_buffer_size, *ksocknal_tunables.ksnd_rx_buffer_size); - if (rc != 0) { + if (rc) { CERROR("Can't set buffer tx %d, rx %d buffers: %d\n", - *ksocknal_tunables.ksnd_tx_buffer_size, - *ksocknal_tunables.ksnd_rx_buffer_size, rc); + *ksocknal_tunables.ksnd_tx_buffer_size, + *ksocknal_tunables.ksnd_rx_buffer_size, rc); return rc; } @@ -514,9 +527,9 @@ ksocknal_lib_setup_sock(struct socket *sock) do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0); option = (do_keepalive ? 1 : 0); - rc = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, - (char *)&option, sizeof(option)); - if (rc != 0) { + rc = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *)&option, + sizeof(option)); + if (rc) { CERROR("Can't set SO_KEEPALIVE: %d\n", rc); return rc; } @@ -524,23 +537,23 @@ ksocknal_lib_setup_sock(struct socket *sock) if (!do_keepalive) return 0; - rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, - (char *)&keep_idle, sizeof(keep_idle)); - if (rc != 0) { + rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, (char *)&keep_idle, + sizeof(keep_idle)); + if (rc) { CERROR("Can't set TCP_KEEPIDLE: %d\n", rc); return rc; } rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL, - (char *)&keep_intvl, sizeof(keep_intvl)); - if (rc != 0) { + (char *)&keep_intvl, sizeof(keep_intvl)); + if (rc) { CERROR("Can't set TCP_KEEPINTVL: %d\n", rc); return rc; } - rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, - (char *)&keep_count, sizeof(keep_count)); - if (rc != 0) { + rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, (char *)&keep_count, + sizeof(keep_count)); + if (rc) { CERROR("Can't set TCP_KEEPCNT: %d\n", rc); return rc; } @@ -558,7 +571,7 @@ ksocknal_lib_push_conn(ksock_conn_t *conn) int rc; rc = ksocknal_connsock_addref(conn); - if (rc != 0) /* being shut down */ + if (rc) /* being shut down */ return; sk = conn->ksnc_sock->sk; @@ -570,8 +583,8 @@ ksocknal_lib_push_conn(ksock_conn_t *conn) release_sock(sk); rc = kernel_setsockopt(conn->ksnc_sock, SOL_TCP, TCP_NODELAY, - (char *)&val, sizeof(val)); - LASSERT(rc == 0); + (char *)&val, sizeof(val)); + LASSERT(!rc); lock_sock(sk); tp->nonagle = nonagle; @@ -593,11 +606,12 @@ ksocknal_data_ready(struct sock *sk) read_lock(&ksocknal_data.ksnd_global_lock); conn = sk->sk_user_data; - if (conn == NULL) { /* raced with ksocknal_terminate_conn */ + if (!conn) { /* raced with ksocknal_terminate_conn */ LASSERT(sk->sk_data_ready != &ksocknal_data_ready); sk->sk_data_ready(sk); - } else + } else { ksocknal_read_callback(conn); + } read_unlock(&ksocknal_data.ksnd_global_lock); } @@ -619,14 +633,14 @@ ksocknal_write_space(struct sock *sk) CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n", sk, wspace, min_wpace, conn, - (conn == NULL) ? "" : (conn->ksnc_tx_ready ? + !conn ? "" : (conn->ksnc_tx_ready ? " ready" : " blocked"), - (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ? + !conn ? "" : (conn->ksnc_tx_scheduled ? " scheduled" : " idle"), - (conn == NULL) ? "" : (list_empty(&conn->ksnc_tx_queue) ? + !conn ? "" : (list_empty(&conn->ksnc_tx_queue) ? " empty" : " queued")); - if (conn == NULL) { /* raced with ksocknal_terminate_conn */ + if (!conn) { /* raced with ksocknal_terminate_conn */ LASSERT(sk->sk_write_space != &ksocknal_write_space); sk->sk_write_space(sk); @@ -637,10 +651,11 @@ ksocknal_write_space(struct sock *sk) if (wspace >= min_wpace) { /* got enough space */ ksocknal_write_callback(conn); - /* Clear SOCK_NOSPACE _after_ ksocknal_write_callback so the + /* + * Clear SOCK_NOSPACE _after_ ksocknal_write_callback so the * ENOMEM check in ksocknal_transmit is race-free (think about - * it). */ - + * it). + */ clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); } @@ -666,15 +681,19 @@ ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn) void ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn) { - /* Remove conn's network callbacks. + /* + * Remove conn's network callbacks. * NB I _have_ to restore the callback, rather than storing a noop, - * since the socket could survive past this module being unloaded!! */ + * since the socket could survive past this module being unloaded!! + */ sock->sk->sk_data_ready = conn->ksnc_saved_data_ready; sock->sk->sk_write_space = conn->ksnc_saved_write_space; - /* A callback could be in progress already; they hold a read lock + /* + * A callback could be in progress already; they hold a read lock * on ksnd_global_lock (to serialise with me) and NOOP if - * sk_user_data is NULL. */ + * sk_user_data is NULL. + */ sock->sk->sk_user_data = NULL; return ; @@ -691,14 +710,16 @@ ksocknal_lib_memory_pressure(ksock_conn_t *conn) if (!test_bit(SOCK_NOSPACE, &conn->ksnc_sock->flags) && !conn->ksnc_tx_ready) { - /* SOCK_NOSPACE is set when the socket fills + /* + * SOCK_NOSPACE is set when the socket fills * and cleared in the write_space callback * (which also sets ksnc_tx_ready). If * SOCK_NOSPACE and ksnc_tx_ready are BOTH * zero, I didn't fill the socket and * write_space won't reschedule me, so I * return -ENOMEM to get my caller to retry - * after a timeout */ + * after a timeout + */ rc = -ENOMEM; } diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c index fdb2b23e2..6329cbe66 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c @@ -14,9 +14,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "socklnd.h" @@ -41,8 +38,10 @@ static int peer_timeout = 180; module_param(peer_timeout, int, 0444); MODULE_PARM_DESC(peer_timeout, "Seconds without aliveness news to declare peer dead (<=0 to disable)"); -/* Number of daemons in each thread pool which is percpt, - * we will estimate reasonable value based on CPUs if it's not set. */ +/* + * Number of daemons in each thread pool which is percpt, + * we will estimate reasonable value based on CPUs if it's not set. + */ static unsigned int nscheds; module_param(nscheds, int, 0444); MODULE_PARM_DESC(nscheds, "# scheduler daemons in each pool while starting"); @@ -72,7 +71,7 @@ static int typed_conns = 1; module_param(typed_conns, int, 0444); MODULE_PARM_DESC(typed_conns, "use different sockets for bulk"); -static int min_bulk = 1<<10; +static int min_bulk = 1 << 10; module_param(min_bulk, int, 0644); MODULE_PARM_DESC(min_bulk, "smallest 'large' message"); diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c index 986bce4c9..32cc31e4c 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c @@ -19,9 +19,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "socklnd.h" @@ -56,15 +53,14 @@ ksocknal_next_tx_carrier(ksock_conn_t *conn) /* Called holding BH lock: conn->ksnc_scheduler->kss_lock */ LASSERT(!list_empty(&conn->ksnc_tx_queue)); - LASSERT(tx != NULL); + LASSERT(tx); /* Next TX that can carry ZC-ACK or LNet message */ if (tx->tx_list.next == &conn->ksnc_tx_queue) { /* no more packets queued */ conn->ksnc_tx_carrier = NULL; } else { - conn->ksnc_tx_carrier = list_entry(tx->tx_list.next, - ksock_tx_t, tx_list); + conn->ksnc_tx_carrier = list_next_entry(tx, tx_list); LASSERT(conn->ksnc_tx_carrier->tx_msg.ksm_type == tx->tx_msg.ksm_type); } } @@ -75,8 +71,8 @@ ksocknal_queue_tx_zcack_v2(ksock_conn_t *conn, { ksock_tx_t *tx = conn->ksnc_tx_carrier; - LASSERT(tx_ack == NULL || - tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP); + LASSERT(!tx_ack || + tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP); /* * Enqueue or piggyback tx_ack / cookie @@ -85,10 +81,10 @@ ksocknal_queue_tx_zcack_v2(ksock_conn_t *conn, * . There is tx can piggyback cookie of tx_ack (or cookie), * piggyback the cookie and return the tx. */ - if (tx == NULL) { - if (tx_ack != NULL) { + if (!tx) { + if (tx_ack) { list_add_tail(&tx_ack->tx_list, - &conn->ksnc_tx_queue); + &conn->ksnc_tx_queue); conn->ksnc_tx_carrier = tx_ack; } return 0; @@ -96,16 +92,16 @@ ksocknal_queue_tx_zcack_v2(ksock_conn_t *conn, if (tx->tx_msg.ksm_type == KSOCK_MSG_NOOP) { /* tx is noop zc-ack, can't piggyback zc-ack cookie */ - if (tx_ack != NULL) + if (tx_ack) list_add_tail(&tx_ack->tx_list, - &conn->ksnc_tx_queue); + &conn->ksnc_tx_queue); return 0; } LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_LNET); - LASSERT(tx->tx_msg.ksm_zc_cookies[1] == 0); + LASSERT(!tx->tx_msg.ksm_zc_cookies[1]); - if (tx_ack != NULL) + if (tx_ack) cookie = tx_ack->tx_msg.ksm_zc_cookies[1]; /* piggyback the zc-ack cookie */ @@ -128,7 +124,7 @@ ksocknal_queue_tx_msg_v2(ksock_conn_t *conn, ksock_tx_t *tx_msg) * . If there is NOOP on the connection, piggyback the cookie * and replace the NOOP tx, and return the NOOP tx. */ - if (tx == NULL) { /* nothing on queue */ + if (!tx) { /* nothing on queue */ list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue); conn->ksnc_tx_carrier = tx_msg; return NULL; @@ -162,22 +158,22 @@ ksocknal_queue_tx_zcack_v3(ksock_conn_t *conn, return ksocknal_queue_tx_zcack_v2(conn, tx_ack, cookie); /* non-blocking ZC-ACK (to router) */ - LASSERT(tx_ack == NULL || - tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP); + LASSERT(!tx_ack || + tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP); tx = conn->ksnc_tx_carrier; - if (tx == NULL) { - if (tx_ack != NULL) { + if (!tx) { + if (tx_ack) { list_add_tail(&tx_ack->tx_list, - &conn->ksnc_tx_queue); + &conn->ksnc_tx_queue); conn->ksnc_tx_carrier = tx_ack; } return 0; } - /* conn->ksnc_tx_carrier != NULL */ + /* conn->ksnc_tx_carrier */ - if (tx_ack != NULL) + if (tx_ack) cookie = tx_ack->tx_msg.ksm_zc_cookies[1]; if (cookie == SOCKNAL_KEEPALIVE_PING) /* ignore keepalive PING */ @@ -185,7 +181,7 @@ ksocknal_queue_tx_zcack_v3(ksock_conn_t *conn, if (tx->tx_msg.ksm_zc_cookies[1] == SOCKNAL_KEEPALIVE_PING) { /* replace the keepalive PING with a real ACK */ - LASSERT(tx->tx_msg.ksm_zc_cookies[0] == 0); + LASSERT(!tx->tx_msg.ksm_zc_cookies[0]); tx->tx_msg.ksm_zc_cookies[1] = cookie; return 1; } @@ -197,7 +193,7 @@ ksocknal_queue_tx_zcack_v3(ksock_conn_t *conn, return 1; /* XXX return error in the future */ } - if (tx->tx_msg.ksm_zc_cookies[0] == 0) { + if (!tx->tx_msg.ksm_zc_cookies[0]) { /* NOOP tx has only one ZC-ACK cookie, can carry at least one more */ if (tx->tx_msg.ksm_zc_cookies[1] > cookie) { tx->tx_msg.ksm_zc_cookies[0] = tx->tx_msg.ksm_zc_cookies[1]; @@ -233,7 +229,7 @@ ksocknal_queue_tx_zcack_v3(ksock_conn_t *conn, tmp = tx->tx_msg.ksm_zc_cookies[0]; } - if (tmp != 0) { + if (tmp) { /* range of cookies */ tx->tx_msg.ksm_zc_cookies[0] = tmp - 1; tx->tx_msg.ksm_zc_cookies[1] = tmp + 1; @@ -261,7 +257,7 @@ ksocknal_queue_tx_zcack_v3(ksock_conn_t *conn, } /* failed to piggyback ZC-ACK */ - if (tx_ack != NULL) { + if (tx_ack) { list_add_tail(&tx_ack->tx_list, &conn->ksnc_tx_queue); /* the next tx can piggyback at least 1 ACK */ ksocknal_next_tx_carrier(conn); @@ -280,7 +276,7 @@ ksocknal_match_tx(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk) return SOCKNAL_MATCH_YES; #endif - if (tx == NULL || tx->tx_lnetmsg == NULL) { + if (!tx || !tx->tx_lnetmsg) { /* noop packet */ nob = offsetof(ksock_msg_t, ksm_u); } else { @@ -319,7 +315,7 @@ ksocknal_match_tx_v3(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk) { int nob; - if (tx == NULL || tx->tx_lnetmsg == NULL) + if (!tx || !tx->tx_lnetmsg) nob = offsetof(ksock_msg_t, ksm_u); else nob = tx->tx_lnetmsg->msg_len + sizeof(ksock_msg_t); @@ -334,7 +330,7 @@ ksocknal_match_tx_v3(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk) case SOCKLND_CONN_ACK: if (nonblk) return SOCKNAL_MATCH_YES; - else if (tx == NULL || tx->tx_lnetmsg == NULL) + else if (!tx || !tx->tx_lnetmsg) return SOCKNAL_MATCH_MAY; else return SOCKNAL_MATCH_NO; @@ -369,10 +365,10 @@ ksocknal_handle_zcreq(ksock_conn_t *c, __u64 cookie, int remote) read_lock(&ksocknal_data.ksnd_global_lock); conn = ksocknal_find_conn_locked(peer, NULL, !!remote); - if (conn != NULL) { + if (conn) { ksock_sched_t *sched = conn->ksnc_scheduler; - LASSERT(conn->ksnc_proto->pro_queue_tx_zcack != NULL); + LASSERT(conn->ksnc_proto->pro_queue_tx_zcack); spin_lock_bh(&sched->kss_lock); @@ -390,11 +386,11 @@ ksocknal_handle_zcreq(ksock_conn_t *c, __u64 cookie, int remote) /* ACK connection is not ready, or can't piggyback the ACK */ tx = ksocknal_alloc_tx_noop(cookie, !!remote); - if (tx == NULL) + if (!tx) return -ENOMEM; rc = ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id); - if (rc == 0) + if (!rc) return 0; ksocknal_free_tx(tx); @@ -407,11 +403,12 @@ ksocknal_handle_zcack(ksock_conn_t *conn, __u64 cookie1, __u64 cookie2) { ksock_peer_t *peer = conn->ksnc_peer; ksock_tx_t *tx; + ksock_tx_t *temp; ksock_tx_t *tmp; LIST_HEAD(zlist); int count; - if (cookie1 == 0) + if (!cookie1) cookie1 = cookie2; count = (cookie1 > cookie2) ? 2 : (cookie2 - cookie1 + 1); @@ -424,8 +421,8 @@ ksocknal_handle_zcack(ksock_conn_t *conn, __u64 cookie1, __u64 cookie2) spin_lock(&peer->ksnp_lock); - list_for_each_entry_safe(tx, tmp, - &peer->ksnp_zc_req_list, tx_zc_list) { + list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list, + tx_zc_list) { __u64 c = tx->tx_msg.ksm_zc_cookies[0]; if (c == cookie1 || c == cookie2 || (cookie1 < c && c < cookie2)) { @@ -433,20 +430,19 @@ ksocknal_handle_zcack(ksock_conn_t *conn, __u64 cookie1, __u64 cookie2) list_del(&tx->tx_zc_list); list_add(&tx->tx_zc_list, &zlist); - if (--count == 0) + if (!--count) break; } } spin_unlock(&peer->ksnp_lock); - while (!list_empty(&zlist)) { - tx = list_entry(zlist.next, ksock_tx_t, tx_zc_list); + list_for_each_entry_safe(tx, temp, &zlist, tx_zc_list) { list_del(&tx->tx_zc_list); ksocknal_tx_decref(tx); } - return count == 0 ? 0 : -EPROTO; + return !count ? 0 : -EPROTO; } static int @@ -461,58 +457,59 @@ ksocknal_send_hello_v1(ksock_conn_t *conn, ksock_hello_msg_t *hello) CLASSERT(sizeof(lnet_magicversion_t) == offsetof(lnet_hdr_t, src_nid)); LIBCFS_ALLOC(hdr, sizeof(*hdr)); - if (hdr == NULL) { + if (!hdr) { CERROR("Can't allocate lnet_hdr_t\n"); return -ENOMEM; } hmv = (lnet_magicversion_t *)&hdr->dest_nid; - /* Re-organize V2.x message header to V1.x (lnet_hdr_t) - * header and send out */ - hmv->magic = cpu_to_le32 (LNET_PROTO_TCP_MAGIC); - hmv->version_major = cpu_to_le16 (KSOCK_PROTO_V1_MAJOR); - hmv->version_minor = cpu_to_le16 (KSOCK_PROTO_V1_MINOR); + /* + * Re-organize V2.x message header to V1.x (lnet_hdr_t) + * header and send out + */ + hmv->magic = cpu_to_le32(LNET_PROTO_TCP_MAGIC); + hmv->version_major = cpu_to_le16(KSOCK_PROTO_V1_MAJOR); + hmv->version_minor = cpu_to_le16(KSOCK_PROTO_V1_MINOR); - if (the_lnet.ln_testprotocompat != 0) { + if (the_lnet.ln_testprotocompat) { /* single-shot proto check */ LNET_LOCK(); - if ((the_lnet.ln_testprotocompat & 1) != 0) { + if (the_lnet.ln_testprotocompat & 1) { hmv->version_major++; /* just different! */ the_lnet.ln_testprotocompat &= ~1; } - if ((the_lnet.ln_testprotocompat & 2) != 0) { + if (the_lnet.ln_testprotocompat & 2) { hmv->magic = LNET_PROTO_MAGIC; the_lnet.ln_testprotocompat &= ~2; } LNET_UNLOCK(); } - hdr->src_nid = cpu_to_le64 (hello->kshm_src_nid); - hdr->src_pid = cpu_to_le32 (hello->kshm_src_pid); - hdr->type = cpu_to_le32 (LNET_MSG_HELLO); - hdr->payload_length = cpu_to_le32 (hello->kshm_nips * sizeof(__u32)); - hdr->msg.hello.type = cpu_to_le32 (hello->kshm_ctype); - hdr->msg.hello.incarnation = cpu_to_le64 (hello->kshm_src_incarnation); + hdr->src_nid = cpu_to_le64(hello->kshm_src_nid); + hdr->src_pid = cpu_to_le32(hello->kshm_src_pid); + hdr->type = cpu_to_le32(LNET_MSG_HELLO); + hdr->payload_length = cpu_to_le32(hello->kshm_nips * sizeof(__u32)); + hdr->msg.hello.type = cpu_to_le32(hello->kshm_ctype); + hdr->msg.hello.incarnation = cpu_to_le64(hello->kshm_src_incarnation); rc = lnet_sock_write(sock, hdr, sizeof(*hdr), lnet_acceptor_timeout()); - if (rc != 0) { + if (rc) { CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n", rc, &conn->ksnc_ipaddr, conn->ksnc_port); goto out; } - if (hello->kshm_nips == 0) + if (!hello->kshm_nips) goto out; - for (i = 0; i < (int) hello->kshm_nips; i++) { - hello->kshm_ips[i] = __cpu_to_le32 (hello->kshm_ips[i]); - } + for (i = 0; i < (int) hello->kshm_nips; i++) + hello->kshm_ips[i] = __cpu_to_le32(hello->kshm_ips[i]); rc = lnet_sock_write(sock, hello->kshm_ips, hello->kshm_nips * sizeof(__u32), lnet_acceptor_timeout()); - if (rc != 0) { + if (rc) { CNETERR("Error %d sending HELLO payload (%d) to %pI4h/%d\n", rc, hello->kshm_nips, &conn->ksnc_ipaddr, conn->ksnc_port); @@ -532,10 +529,10 @@ ksocknal_send_hello_v2(ksock_conn_t *conn, ksock_hello_msg_t *hello) hello->kshm_magic = LNET_PROTO_MAGIC; hello->kshm_version = conn->ksnc_proto->pro_version; - if (the_lnet.ln_testprotocompat != 0) { + if (the_lnet.ln_testprotocompat) { /* single-shot proto check */ LNET_LOCK(); - if ((the_lnet.ln_testprotocompat & 1) != 0) { + if (the_lnet.ln_testprotocompat & 1) { hello->kshm_version++; /* just different! */ the_lnet.ln_testprotocompat &= ~1; } @@ -544,19 +541,19 @@ ksocknal_send_hello_v2(ksock_conn_t *conn, ksock_hello_msg_t *hello) rc = lnet_sock_write(sock, hello, offsetof(ksock_hello_msg_t, kshm_ips), lnet_acceptor_timeout()); - if (rc != 0) { + if (rc) { CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n", rc, &conn->ksnc_ipaddr, conn->ksnc_port); return rc; } - if (hello->kshm_nips == 0) + if (!hello->kshm_nips) return 0; rc = lnet_sock_write(sock, hello->kshm_ips, hello->kshm_nips * sizeof(__u32), lnet_acceptor_timeout()); - if (rc != 0) { + if (rc) { CNETERR("Error %d sending HELLO payload (%d) to %pI4h/%d\n", rc, hello->kshm_nips, &conn->ksnc_ipaddr, conn->ksnc_port); @@ -575,7 +572,7 @@ ksocknal_recv_hello_v1(ksock_conn_t *conn, ksock_hello_msg_t *hello, int i; LIBCFS_ALLOC(hdr, sizeof(*hdr)); - if (hdr == NULL) { + if (!hdr) { CERROR("Can't allocate lnet_hdr_t\n"); return -ENOMEM; } @@ -583,15 +580,15 @@ ksocknal_recv_hello_v1(ksock_conn_t *conn, ksock_hello_msg_t *hello, rc = lnet_sock_read(sock, &hdr->src_nid, sizeof(*hdr) - offsetof(lnet_hdr_t, src_nid), timeout); - if (rc != 0) { + if (rc) { CERROR("Error %d reading rest of HELLO hdr from %pI4h\n", - rc, &conn->ksnc_ipaddr); + rc, &conn->ksnc_ipaddr); LASSERT(rc < 0 && rc != -EALREADY); goto out; } /* ...and check we got what we expected */ - if (hdr->type != cpu_to_le32 (LNET_MSG_HELLO)) { + if (hdr->type != cpu_to_le32(LNET_MSG_HELLO)) { CERROR("Expecting a HELLO hdr, but got type %d from %pI4h\n", le32_to_cpu(hdr->type), &conn->ksnc_ipaddr); @@ -613,14 +610,14 @@ ksocknal_recv_hello_v1(ksock_conn_t *conn, ksock_hello_msg_t *hello, goto out; } - if (hello->kshm_nips == 0) + if (!hello->kshm_nips) goto out; rc = lnet_sock_read(sock, hello->kshm_ips, hello->kshm_nips * sizeof(__u32), timeout); - if (rc != 0) { + if (rc) { CERROR("Error %d reading IPs from ip %pI4h\n", - rc, &conn->ksnc_ipaddr); + rc, &conn->ksnc_ipaddr); LASSERT(rc < 0 && rc != -EALREADY); goto out; } @@ -628,7 +625,7 @@ ksocknal_recv_hello_v1(ksock_conn_t *conn, ksock_hello_msg_t *hello, for (i = 0; i < (int) hello->kshm_nips; i++) { hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]); - if (hello->kshm_ips[i] == 0) { + if (!hello->kshm_ips[i]) { CERROR("Zero IP[%d] from ip %pI4h\n", i, &conn->ksnc_ipaddr); rc = -EPROTO; @@ -657,9 +654,9 @@ ksocknal_recv_hello_v2(ksock_conn_t *conn, ksock_hello_msg_t *hello, int timeout offsetof(ksock_hello_msg_t, kshm_ips) - offsetof(ksock_hello_msg_t, kshm_src_nid), timeout); - if (rc != 0) { + if (rc) { CERROR("Error %d reading HELLO from %pI4h\n", - rc, &conn->ksnc_ipaddr); + rc, &conn->ksnc_ipaddr); LASSERT(rc < 0 && rc != -EALREADY); return rc; } @@ -681,14 +678,14 @@ ksocknal_recv_hello_v2(ksock_conn_t *conn, ksock_hello_msg_t *hello, int timeout return -EPROTO; } - if (hello->kshm_nips == 0) + if (!hello->kshm_nips) return 0; rc = lnet_sock_read(sock, hello->kshm_ips, hello->kshm_nips * sizeof(__u32), timeout); - if (rc != 0) { + if (rc) { CERROR("Error %d reading IPs from ip %pI4h\n", - rc, &conn->ksnc_ipaddr); + rc, &conn->ksnc_ipaddr); LASSERT(rc < 0 && rc != -EALREADY); return rc; } @@ -697,7 +694,7 @@ ksocknal_recv_hello_v2(ksock_conn_t *conn, ksock_hello_msg_t *hello, int timeout if (conn->ksnc_flip) __swab32s(&hello->kshm_ips[i]); - if (hello->kshm_ips[i] == 0) { + if (!hello->kshm_ips[i]) { CERROR("Zero IP[%d] from ip %pI4h\n", i, &conn->ksnc_ipaddr); return -EPROTO; @@ -712,12 +709,13 @@ ksocknal_pack_msg_v1(ksock_tx_t *tx) { /* V1.x has no KSOCK_MSG_NOOP */ LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP); - LASSERT(tx->tx_lnetmsg != NULL); + LASSERT(tx->tx_lnetmsg); tx->tx_iov[0].iov_base = &tx->tx_lnetmsg->msg_hdr; tx->tx_iov[0].iov_len = sizeof(lnet_hdr_t); - tx->tx_resid = tx->tx_nob = tx->tx_lnetmsg->msg_len + sizeof(lnet_hdr_t); + tx->tx_nob = tx->tx_lnetmsg->msg_len + sizeof(lnet_hdr_t); + tx->tx_resid = tx->tx_lnetmsg->msg_len + sizeof(lnet_hdr_t); } static void @@ -725,17 +723,19 @@ ksocknal_pack_msg_v2(ksock_tx_t *tx) { tx->tx_iov[0].iov_base = &tx->tx_msg; - if (tx->tx_lnetmsg != NULL) { + if (tx->tx_lnetmsg) { LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP); tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = tx->tx_lnetmsg->msg_hdr; tx->tx_iov[0].iov_len = sizeof(ksock_msg_t); - tx->tx_resid = tx->tx_nob = sizeof(ksock_msg_t) + tx->tx_lnetmsg->msg_len; + tx->tx_nob = sizeof(ksock_msg_t) + tx->tx_lnetmsg->msg_len; + tx->tx_resid = sizeof(ksock_msg_t) + tx->tx_lnetmsg->msg_len; } else { LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP); tx->tx_iov[0].iov_len = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr); - tx->tx_resid = tx->tx_nob = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr); + tx->tx_nob = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr); + tx->tx_resid = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr); } /* Don't checksum before start sending, because packet can be piggybacked with ACK */ } @@ -745,7 +745,8 @@ ksocknal_unpack_msg_v1(ksock_msg_t *msg) { msg->ksm_csum = 0; msg->ksm_type = KSOCK_MSG_LNET; - msg->ksm_zc_cookies[0] = msg->ksm_zc_cookies[1] = 0; + msg->ksm_zc_cookies[0] = 0; + msg->ksm_zc_cookies[1] = 0; } static void diff --git a/drivers/staging/lustre/lnet/libcfs/Makefile b/drivers/staging/lustre/lnet/libcfs/Makefile new file mode 100644 index 000000000..8c8945545 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/Makefile @@ -0,0 +1,17 @@ +obj-$(CONFIG_LNET) += libcfs.o + +libcfs-linux-objs := linux-tracefile.o linux-debug.o +libcfs-linux-objs += linux-prim.o linux-cpu.o +libcfs-linux-objs += linux-curproc.o +libcfs-linux-objs += linux-module.o +libcfs-linux-objs += linux-crypto.o +libcfs-linux-objs += linux-crypto-adler.o +libcfs-linux-objs += linux-mem.o + +libcfs-linux-objs := $(addprefix linux/,$(libcfs-linux-objs)) + +libcfs-all-objs := debug.o fail.o module.o tracefile.o \ + libcfs_string.o hash.o prng.o workitem.o \ + libcfs_cpu.o libcfs_mem.o libcfs_lock.o + +libcfs-objs := $(libcfs-linux-objs) $(libcfs-all-objs) diff --git a/drivers/staging/lustre/lnet/libcfs/debug.c b/drivers/staging/lustre/lnet/libcfs/debug.c new file mode 100644 index 000000000..c3d628bac --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/debug.c @@ -0,0 +1,560 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/libcfs/debug.c + * + * Author: Phil Schwan <phil@clusterfs.com> + * + */ + +# define DEBUG_SUBSYSTEM S_LNET + +#include "../../include/linux/libcfs/libcfs.h" +#include "tracefile.h" + +static char debug_file_name[1024]; + +unsigned int libcfs_subsystem_debug = ~0; +EXPORT_SYMBOL(libcfs_subsystem_debug); +module_param(libcfs_subsystem_debug, int, 0644); +MODULE_PARM_DESC(libcfs_subsystem_debug, "Lustre kernel debug subsystem mask"); + +unsigned int libcfs_debug = (D_CANTMASK | + D_NETERROR | D_HA | D_CONFIG | D_IOCTL); +EXPORT_SYMBOL(libcfs_debug); +module_param(libcfs_debug, int, 0644); +MODULE_PARM_DESC(libcfs_debug, "Lustre kernel debug mask"); + +static int libcfs_param_debug_mb_set(const char *val, + const struct kernel_param *kp) +{ + int rc; + unsigned num; + + rc = kstrtouint(val, 0, &num); + if (rc < 0) + return rc; + + if (!*((unsigned int *)kp->arg)) { + *((unsigned int *)kp->arg) = num; + return 0; + } + + rc = cfs_trace_set_debug_mb(num); + + if (!rc) + *((unsigned int *)kp->arg) = cfs_trace_get_debug_mb(); + + return rc; +} + +/* While debug_mb setting look like unsigned int, in fact + * it needs quite a bunch of extra processing, so we define special + * debugmb parameter type with corresponding methods to handle this case + */ +static struct kernel_param_ops param_ops_debugmb = { + .set = libcfs_param_debug_mb_set, + .get = param_get_uint, +}; + +#define param_check_debugmb(name, p) \ + __param_check(name, p, unsigned int) + +static unsigned int libcfs_debug_mb; +module_param(libcfs_debug_mb, debugmb, 0644); +MODULE_PARM_DESC(libcfs_debug_mb, "Total debug buffer size."); + +unsigned int libcfs_printk = D_CANTMASK; +module_param(libcfs_printk, uint, 0644); +MODULE_PARM_DESC(libcfs_printk, "Lustre kernel debug console mask"); + +unsigned int libcfs_console_ratelimit = 1; +module_param(libcfs_console_ratelimit, uint, 0644); +MODULE_PARM_DESC(libcfs_console_ratelimit, "Lustre kernel debug console ratelimit (0 to disable)"); + +static int param_set_delay_minmax(const char *val, + const struct kernel_param *kp, + long min, long max) +{ + long d; + int sec; + int rc; + + rc = kstrtoint(val, 0, &sec); + if (rc) + return -EINVAL; + + d = cfs_time_seconds(sec) / 100; + if (d < min || d > max) + return -EINVAL; + + *((unsigned int *)kp->arg) = d; + + return 0; +} + +static int param_get_delay(char *buffer, const struct kernel_param *kp) +{ + unsigned int d = *(unsigned int *)kp->arg; + + return sprintf(buffer, "%u", (unsigned int)cfs_duration_sec(d * 100)); +} + +unsigned int libcfs_console_max_delay; +unsigned int libcfs_console_min_delay; + +static int param_set_console_max_delay(const char *val, + const struct kernel_param *kp) +{ + return param_set_delay_minmax(val, kp, + libcfs_console_min_delay, INT_MAX); +} + +static struct kernel_param_ops param_ops_console_max_delay = { + .set = param_set_console_max_delay, + .get = param_get_delay, +}; + +#define param_check_console_max_delay(name, p) \ + __param_check(name, p, unsigned int) + +module_param(libcfs_console_max_delay, console_max_delay, 0644); +MODULE_PARM_DESC(libcfs_console_max_delay, "Lustre kernel debug console max delay (jiffies)"); + +static int param_set_console_min_delay(const char *val, + const struct kernel_param *kp) +{ + return param_set_delay_minmax(val, kp, + 1, libcfs_console_max_delay); +} + +static struct kernel_param_ops param_ops_console_min_delay = { + .set = param_set_console_min_delay, + .get = param_get_delay, +}; + +#define param_check_console_min_delay(name, p) \ + __param_check(name, p, unsigned int) + +module_param(libcfs_console_min_delay, console_min_delay, 0644); +MODULE_PARM_DESC(libcfs_console_min_delay, "Lustre kernel debug console min delay (jiffies)"); + +static int param_set_uint_minmax(const char *val, + const struct kernel_param *kp, + unsigned int min, unsigned int max) +{ + unsigned int num; + int ret; + + if (!val) + return -EINVAL; + ret = kstrtouint(val, 0, &num); + if (ret < 0 || num < min || num > max) + return -EINVAL; + *((unsigned int *)kp->arg) = num; + return 0; +} + +static int param_set_uintpos(const char *val, const struct kernel_param *kp) +{ + return param_set_uint_minmax(val, kp, 1, -1); +} + +static struct kernel_param_ops param_ops_uintpos = { + .set = param_set_uintpos, + .get = param_get_uint, +}; + +#define param_check_uintpos(name, p) \ + __param_check(name, p, unsigned int) + +unsigned int libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF; +module_param(libcfs_console_backoff, uintpos, 0644); +MODULE_PARM_DESC(libcfs_console_backoff, "Lustre kernel debug console backoff factor"); + +unsigned int libcfs_debug_binary = 1; + +unsigned int libcfs_stack = 3 * THREAD_SIZE / 4; +EXPORT_SYMBOL(libcfs_stack); + +unsigned int libcfs_catastrophe; +EXPORT_SYMBOL(libcfs_catastrophe); + +unsigned int libcfs_panic_on_lbug = 1; +module_param(libcfs_panic_on_lbug, uint, 0644); +MODULE_PARM_DESC(libcfs_panic_on_lbug, "Lustre kernel panic on LBUG"); + +static wait_queue_head_t debug_ctlwq; + +char libcfs_debug_file_path_arr[PATH_MAX] = LIBCFS_DEBUG_FILE_PATH_DEFAULT; + +/* We need to pass a pointer here, but elsewhere this must be a const */ +static char *libcfs_debug_file_path; +module_param(libcfs_debug_file_path, charp, 0644); +MODULE_PARM_DESC(libcfs_debug_file_path, + "Path for dumping debug logs, set 'NONE' to prevent log dumping"); + +int libcfs_panic_in_progress; + +/* libcfs_debug_token2mask() expects the returned string in lower-case */ +static const char * +libcfs_debug_subsys2str(int subsys) +{ + switch (1 << subsys) { + default: + return NULL; + case S_UNDEFINED: + return "undefined"; + case S_MDC: + return "mdc"; + case S_MDS: + return "mds"; + case S_OSC: + return "osc"; + case S_OST: + return "ost"; + case S_CLASS: + return "class"; + case S_LOG: + return "log"; + case S_LLITE: + return "llite"; + case S_RPC: + return "rpc"; + case S_LNET: + return "lnet"; + case S_LND: + return "lnd"; + case S_PINGER: + return "pinger"; + case S_FILTER: + return "filter"; + case S_ECHO: + return "echo"; + case S_LDLM: + return "ldlm"; + case S_LOV: + return "lov"; + case S_LQUOTA: + return "lquota"; + case S_OSD: + return "osd"; + case S_LFSCK: + return "lfsck"; + case S_LMV: + return "lmv"; + case S_SEC: + return "sec"; + case S_GSS: + return "gss"; + case S_MGC: + return "mgc"; + case S_MGS: + return "mgs"; + case S_FID: + return "fid"; + case S_FLD: + return "fld"; + } +} + +/* libcfs_debug_token2mask() expects the returned string in lower-case */ +static const char * +libcfs_debug_dbg2str(int debug) +{ + switch (1 << debug) { + default: + return NULL; + case D_TRACE: + return "trace"; + case D_INODE: + return "inode"; + case D_SUPER: + return "super"; + case D_EXT2: + return "ext2"; + case D_MALLOC: + return "malloc"; + case D_CACHE: + return "cache"; + case D_INFO: + return "info"; + case D_IOCTL: + return "ioctl"; + case D_NETERROR: + return "neterror"; + case D_NET: + return "net"; + case D_WARNING: + return "warning"; + case D_BUFFS: + return "buffs"; + case D_OTHER: + return "other"; + case D_DENTRY: + return "dentry"; + case D_NETTRACE: + return "nettrace"; + case D_PAGE: + return "page"; + case D_DLMTRACE: + return "dlmtrace"; + case D_ERROR: + return "error"; + case D_EMERG: + return "emerg"; + case D_HA: + return "ha"; + case D_RPCTRACE: + return "rpctrace"; + case D_VFSTRACE: + return "vfstrace"; + case D_READA: + return "reada"; + case D_MMAP: + return "mmap"; + case D_CONFIG: + return "config"; + case D_CONSOLE: + return "console"; + case D_QUOTA: + return "quota"; + case D_SEC: + return "sec"; + case D_LFSCK: + return "lfsck"; + } +} + +int +libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys) +{ + const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str : + libcfs_debug_dbg2str; + int len = 0; + const char *token; + int i; + + if (mask == 0) { /* "0" */ + if (size > 0) + str[0] = '0'; + len = 1; + } else { /* space-separated tokens */ + for (i = 0; i < 32; i++) { + if ((mask & (1 << i)) == 0) + continue; + + token = fn(i); + if (!token) /* unused bit */ + continue; + + if (len > 0) { /* separator? */ + if (len < size) + str[len] = ' '; + len++; + } + + while (*token != 0) { + if (len < size) + str[len] = *token; + token++; + len++; + } + } + } + + /* terminate 'str' */ + if (len < size) + str[len] = 0; + else + str[size - 1] = 0; + + return len; +} + +int +libcfs_debug_str2mask(int *mask, const char *str, int is_subsys) +{ + const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str : + libcfs_debug_dbg2str; + int m = 0; + int matched; + int n; + int t; + + /* Allow a number for backwards compatibility */ + + for (n = strlen(str); n > 0; n--) + if (!isspace(str[n - 1])) + break; + matched = n; + t = sscanf(str, "%i%n", &m, &matched); + if (t >= 1 && matched == n) { + /* don't print warning for lctl set_param debug=0 or -1 */ + if (m != 0 && m != -1) + CWARN("You are trying to use a numerical value for the mask - this will be deprecated in a future release.\n"); + *mask = m; + return 0; + } + + return cfs_str2mask(str, fn, mask, is_subsys ? 0 : D_CANTMASK, + 0xffffffff); +} + +/** + * Dump Lustre log to ::debug_file_path by calling tracefile_dump_all_pages() + */ +void libcfs_debug_dumplog_internal(void *arg) +{ + void *journal_info; + + journal_info = current->journal_info; + current->journal_info = NULL; + + if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) != 0) { + snprintf(debug_file_name, sizeof(debug_file_name) - 1, + "%s.%lld.%ld", libcfs_debug_file_path_arr, + (s64)ktime_get_real_seconds(), (long_ptr_t)arg); + pr_alert("LustreError: dumping log to %s\n", debug_file_name); + cfs_tracefile_dump_all_pages(debug_file_name); + libcfs_run_debug_log_upcall(debug_file_name); + } + + current->journal_info = journal_info; +} + +static int libcfs_debug_dumplog_thread(void *arg) +{ + libcfs_debug_dumplog_internal(arg); + wake_up(&debug_ctlwq); + return 0; +} + +void libcfs_debug_dumplog(void) +{ + wait_queue_t wait; + struct task_struct *dumper; + + /* we're being careful to ensure that the kernel thread is + * able to set our state to running as it exits before we + * get to schedule() + */ + init_waitqueue_entry(&wait, current); + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&debug_ctlwq, &wait); + + dumper = kthread_run(libcfs_debug_dumplog_thread, + (void *)(long)current_pid(), + "libcfs_debug_dumper"); + if (IS_ERR(dumper)) + pr_err("LustreError: cannot start log dump thread: %ld\n", + PTR_ERR(dumper)); + else + schedule(); + + /* be sure to teardown if cfs_create_thread() failed */ + remove_wait_queue(&debug_ctlwq, &wait); + set_current_state(TASK_RUNNING); +} +EXPORT_SYMBOL(libcfs_debug_dumplog); + +int libcfs_debug_init(unsigned long bufsize) +{ + int rc = 0; + unsigned int max = libcfs_debug_mb; + + init_waitqueue_head(&debug_ctlwq); + + if (libcfs_console_max_delay <= 0 || /* not set by user or */ + libcfs_console_min_delay <= 0 || /* set to invalid values */ + libcfs_console_min_delay >= libcfs_console_max_delay) { + libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY; + libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY; + } + + if (libcfs_debug_file_path) { + strlcpy(libcfs_debug_file_path_arr, + libcfs_debug_file_path, + sizeof(libcfs_debug_file_path_arr)); + } + + /* If libcfs_debug_mb is set to an invalid value or uninitialized + * then just make the total buffers smp_num_cpus * TCD_MAX_PAGES + */ + if (max > cfs_trace_max_debug_mb() || max < num_possible_cpus()) { + max = TCD_MAX_PAGES; + } else { + max = max / num_possible_cpus(); + max <<= (20 - PAGE_SHIFT); + } + rc = cfs_tracefile_init(max); + + if (rc == 0) { + libcfs_register_panic_notifier(); + libcfs_debug_mb = cfs_trace_get_debug_mb(); + } + + return rc; +} + +int libcfs_debug_cleanup(void) +{ + libcfs_unregister_panic_notifier(); + cfs_tracefile_exit(); + return 0; +} + +int libcfs_debug_clear_buffer(void) +{ + cfs_trace_flush_pages(); + return 0; +} + +/* Debug markers, although printed by S_LNET should not be be marked as such. */ +#undef DEBUG_SUBSYSTEM +#define DEBUG_SUBSYSTEM S_UNDEFINED +int libcfs_debug_mark_buffer(const char *text) +{ + CDEBUG(D_TRACE, + "***************************************************\n"); + LCONSOLE(D_WARNING, "DEBUG MARKER: %s\n", text); + CDEBUG(D_TRACE, + "***************************************************\n"); + + return 0; +} + +#undef DEBUG_SUBSYSTEM +#define DEBUG_SUBSYSTEM S_LNET diff --git a/drivers/staging/lustre/lnet/libcfs/fail.c b/drivers/staging/lustre/lnet/libcfs/fail.c new file mode 100644 index 000000000..dadaf7685 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/fail.c @@ -0,0 +1,139 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please contact Oracle Corporation, Inc., 500 Oracle Parkway, Redwood Shores, + * CA 94065 USA or visit www.oracle.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2015, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Oracle Corporation, Inc. + */ + +#include "../../include/linux/libcfs/libcfs.h" + +unsigned long cfs_fail_loc; +EXPORT_SYMBOL(cfs_fail_loc); + +unsigned int cfs_fail_val; +EXPORT_SYMBOL(cfs_fail_val); + +DECLARE_WAIT_QUEUE_HEAD(cfs_race_waitq); +EXPORT_SYMBOL(cfs_race_waitq); + +int cfs_race_state; +EXPORT_SYMBOL(cfs_race_state); + +int __cfs_fail_check_set(__u32 id, __u32 value, int set) +{ + static atomic_t cfs_fail_count = ATOMIC_INIT(0); + + LASSERT(!(id & CFS_FAIL_ONCE)); + + if ((cfs_fail_loc & (CFS_FAILED | CFS_FAIL_ONCE)) == + (CFS_FAILED | CFS_FAIL_ONCE)) { + atomic_set(&cfs_fail_count, 0); /* paranoia */ + return 0; + } + + /* Fail 1/cfs_fail_val times */ + if (cfs_fail_loc & CFS_FAIL_RAND) { + if (cfs_fail_val < 2 || cfs_rand() % cfs_fail_val > 0) + return 0; + } + + /* Skip the first cfs_fail_val, then fail */ + if (cfs_fail_loc & CFS_FAIL_SKIP) { + if (atomic_inc_return(&cfs_fail_count) <= cfs_fail_val) + return 0; + } + + /* check cfs_fail_val... */ + if (set == CFS_FAIL_LOC_VALUE) { + if (cfs_fail_val != -1 && cfs_fail_val != value) + return 0; + } + + /* Fail cfs_fail_val times, overridden by FAIL_ONCE */ + if (cfs_fail_loc & CFS_FAIL_SOME && + (!(cfs_fail_loc & CFS_FAIL_ONCE) || cfs_fail_val <= 1)) { + int count = atomic_inc_return(&cfs_fail_count); + + if (count >= cfs_fail_val) { + set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc); + atomic_set(&cfs_fail_count, 0); + /* we are lost race to increase */ + if (count > cfs_fail_val) + return 0; + } + } + + if ((set == CFS_FAIL_LOC_ORSET || set == CFS_FAIL_LOC_RESET) && + (value & CFS_FAIL_ONCE)) + set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc); + /* Lost race to set CFS_FAILED_BIT. */ + if (test_and_set_bit(CFS_FAILED_BIT, &cfs_fail_loc)) { + /* If CFS_FAIL_ONCE is valid, only one process can fail, + * otherwise multi-process can fail at the same time. + */ + if (cfs_fail_loc & CFS_FAIL_ONCE) + return 0; + } + + switch (set) { + case CFS_FAIL_LOC_NOSET: + case CFS_FAIL_LOC_VALUE: + break; + case CFS_FAIL_LOC_ORSET: + cfs_fail_loc |= value & ~(CFS_FAILED | CFS_FAIL_ONCE); + break; + case CFS_FAIL_LOC_RESET: + cfs_fail_loc = value; + break; + default: + LASSERTF(0, "called with bad set %u\n", set); + break; + } + + return 1; +} +EXPORT_SYMBOL(__cfs_fail_check_set); + +int __cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set) +{ + int ret; + + ret = __cfs_fail_check_set(id, value, set); + if (ret && likely(ms > 0)) { + CERROR("cfs_fail_timeout id %x sleeping for %dms\n", + id, ms); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(cfs_time_seconds(ms) / 1000); + CERROR("cfs_fail_timeout id %x awake\n", id); + } + return ret; +} +EXPORT_SYMBOL(__cfs_fail_timeout_set); diff --git a/drivers/staging/lustre/lnet/libcfs/hash.c b/drivers/staging/lustre/lnet/libcfs/hash.c new file mode 100644 index 000000000..f60feb3a3 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/hash.c @@ -0,0 +1,2085 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/libcfs/hash.c + * + * Implement a hash class for hash process in lustre system. + * + * Author: YuZhangyong <yzy@clusterfs.com> + * + * 2008-08-15: Brian Behlendorf <behlendorf1@llnl.gov> + * - Simplified API and improved documentation + * - Added per-hash feature flags: + * * CFS_HASH_DEBUG additional validation + * * CFS_HASH_REHASH dynamic rehashing + * - Added per-hash statistics + * - General performance enhancements + * + * 2009-07-31: Liang Zhen <zhen.liang@sun.com> + * - move all stuff to libcfs + * - don't allow cur_bits != max_bits without setting of CFS_HASH_REHASH + * - ignore hs_rwlock if without CFS_HASH_REHASH setting + * - buckets are allocated one by one(instead of contiguous memory), + * to avoid unnecessary cacheline conflict + * + * 2010-03-01: Liang Zhen <zhen.liang@sun.com> + * - "bucket" is a group of hlist_head now, user can specify bucket size + * by bkt_bits of cfs_hash_create(), all hlist_heads in a bucket share + * one lock for reducing memory overhead. + * + * - support lockless hash, caller will take care of locks: + * avoid lock overhead for hash tables that are already protected + * by locking in the caller for another reason + * + * - support both spin_lock/rwlock for bucket: + * overhead of spinlock contention is lower than read/write + * contention of rwlock, so using spinlock to serialize operations on + * bucket is more reasonable for those frequently changed hash tables + * + * - support one-single lock mode: + * one lock to protect all hash operations to avoid overhead of + * multiple locks if hash table is always small + * + * - removed a lot of unnecessary addref & decref on hash element: + * addref & decref are atomic operations in many use-cases which + * are expensive. + * + * - support non-blocking cfs_hash_add() and cfs_hash_findadd(): + * some lustre use-cases require these functions to be strictly + * non-blocking, we need to schedule required rehash on a different + * thread on those cases. + * + * - safer rehash on large hash table + * In old implementation, rehash function will exclusively lock the + * hash table and finish rehash in one batch, it's dangerous on SMP + * system because rehash millions of elements could take long time. + * New implemented rehash can release lock and relax CPU in middle + * of rehash, it's safe for another thread to search/change on the + * hash table even it's in rehasing. + * + * - support two different refcount modes + * . hash table has refcount on element + * . hash table doesn't change refcount on adding/removing element + * + * - support long name hash table (for param-tree) + * + * - fix a bug for cfs_hash_rehash_key: + * in old implementation, cfs_hash_rehash_key could screw up the + * hash-table because @key is overwritten without any protection. + * Now we need user to define hs_keycpy for those rehash enabled + * hash tables, cfs_hash_rehash_key will overwrite hash-key + * inside lock by calling hs_keycpy. + * + * - better hash iteration: + * Now we support both locked iteration & lockless iteration of hash + * table. Also, user can break the iteration by return 1 in callback. + */ +#include <linux/seq_file.h> +#include <linux/log2.h> + +#include "../../include/linux/libcfs/libcfs.h" + +#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 +static unsigned int warn_on_depth = 8; +module_param(warn_on_depth, uint, 0644); +MODULE_PARM_DESC(warn_on_depth, "warning when hash depth is high."); +#endif + +struct cfs_wi_sched *cfs_sched_rehash; + +static inline void +cfs_hash_nl_lock(union cfs_hash_lock *lock, int exclusive) {} + +static inline void +cfs_hash_nl_unlock(union cfs_hash_lock *lock, int exclusive) {} + +static inline void +cfs_hash_spin_lock(union cfs_hash_lock *lock, int exclusive) + __acquires(&lock->spin) +{ + spin_lock(&lock->spin); +} + +static inline void +cfs_hash_spin_unlock(union cfs_hash_lock *lock, int exclusive) + __releases(&lock->spin) +{ + spin_unlock(&lock->spin); +} + +static inline void +cfs_hash_rw_lock(union cfs_hash_lock *lock, int exclusive) + __acquires(&lock->rw) +{ + if (!exclusive) + read_lock(&lock->rw); + else + write_lock(&lock->rw); +} + +static inline void +cfs_hash_rw_unlock(union cfs_hash_lock *lock, int exclusive) + __releases(&lock->rw) +{ + if (!exclusive) + read_unlock(&lock->rw); + else + write_unlock(&lock->rw); +} + +/** No lock hash */ +static struct cfs_hash_lock_ops cfs_hash_nl_lops = { + .hs_lock = cfs_hash_nl_lock, + .hs_unlock = cfs_hash_nl_unlock, + .hs_bkt_lock = cfs_hash_nl_lock, + .hs_bkt_unlock = cfs_hash_nl_unlock, +}; + +/** no bucket lock, one spinlock to protect everything */ +static struct cfs_hash_lock_ops cfs_hash_nbl_lops = { + .hs_lock = cfs_hash_spin_lock, + .hs_unlock = cfs_hash_spin_unlock, + .hs_bkt_lock = cfs_hash_nl_lock, + .hs_bkt_unlock = cfs_hash_nl_unlock, +}; + +/** spin bucket lock, rehash is enabled */ +static struct cfs_hash_lock_ops cfs_hash_bkt_spin_lops = { + .hs_lock = cfs_hash_rw_lock, + .hs_unlock = cfs_hash_rw_unlock, + .hs_bkt_lock = cfs_hash_spin_lock, + .hs_bkt_unlock = cfs_hash_spin_unlock, +}; + +/** rw bucket lock, rehash is enabled */ +static struct cfs_hash_lock_ops cfs_hash_bkt_rw_lops = { + .hs_lock = cfs_hash_rw_lock, + .hs_unlock = cfs_hash_rw_unlock, + .hs_bkt_lock = cfs_hash_rw_lock, + .hs_bkt_unlock = cfs_hash_rw_unlock, +}; + +/** spin bucket lock, rehash is disabled */ +static struct cfs_hash_lock_ops cfs_hash_nr_bkt_spin_lops = { + .hs_lock = cfs_hash_nl_lock, + .hs_unlock = cfs_hash_nl_unlock, + .hs_bkt_lock = cfs_hash_spin_lock, + .hs_bkt_unlock = cfs_hash_spin_unlock, +}; + +/** rw bucket lock, rehash is disabled */ +static struct cfs_hash_lock_ops cfs_hash_nr_bkt_rw_lops = { + .hs_lock = cfs_hash_nl_lock, + .hs_unlock = cfs_hash_nl_unlock, + .hs_bkt_lock = cfs_hash_rw_lock, + .hs_bkt_unlock = cfs_hash_rw_unlock, +}; + +static void +cfs_hash_lock_setup(struct cfs_hash *hs) +{ + if (cfs_hash_with_no_lock(hs)) { + hs->hs_lops = &cfs_hash_nl_lops; + + } else if (cfs_hash_with_no_bktlock(hs)) { + hs->hs_lops = &cfs_hash_nbl_lops; + spin_lock_init(&hs->hs_lock.spin); + + } else if (cfs_hash_with_rehash(hs)) { + rwlock_init(&hs->hs_lock.rw); + + if (cfs_hash_with_rw_bktlock(hs)) + hs->hs_lops = &cfs_hash_bkt_rw_lops; + else if (cfs_hash_with_spin_bktlock(hs)) + hs->hs_lops = &cfs_hash_bkt_spin_lops; + else + LBUG(); + } else { + if (cfs_hash_with_rw_bktlock(hs)) + hs->hs_lops = &cfs_hash_nr_bkt_rw_lops; + else if (cfs_hash_with_spin_bktlock(hs)) + hs->hs_lops = &cfs_hash_nr_bkt_spin_lops; + else + LBUG(); + } +} + +/** + * Simple hash head without depth tracking + * new element is always added to head of hlist + */ +struct cfs_hash_head { + struct hlist_head hh_head; /**< entries list */ +}; + +static int +cfs_hash_hh_hhead_size(struct cfs_hash *hs) +{ + return sizeof(struct cfs_hash_head); +} + +static struct hlist_head * +cfs_hash_hh_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd) +{ + struct cfs_hash_head *head; + + head = (struct cfs_hash_head *)&bd->bd_bucket->hsb_head[0]; + return &head[bd->bd_offset].hh_head; +} + +static int +cfs_hash_hh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnode) +{ + hlist_add_head(hnode, cfs_hash_hh_hhead(hs, bd)); + return -1; /* unknown depth */ +} + +static int +cfs_hash_hh_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnode) +{ + hlist_del_init(hnode); + return -1; /* unknown depth */ +} + +/** + * Simple hash head with depth tracking + * new element is always added to head of hlist + */ +struct cfs_hash_head_dep { + struct hlist_head hd_head; /**< entries list */ + unsigned int hd_depth; /**< list length */ +}; + +static int +cfs_hash_hd_hhead_size(struct cfs_hash *hs) +{ + return sizeof(struct cfs_hash_head_dep); +} + +static struct hlist_head * +cfs_hash_hd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd) +{ + struct cfs_hash_head_dep *head; + + head = (struct cfs_hash_head_dep *)&bd->bd_bucket->hsb_head[0]; + return &head[bd->bd_offset].hd_head; +} + +static int +cfs_hash_hd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnode) +{ + struct cfs_hash_head_dep *hh; + + hh = container_of(cfs_hash_hd_hhead(hs, bd), + struct cfs_hash_head_dep, hd_head); + hlist_add_head(hnode, &hh->hd_head); + return ++hh->hd_depth; +} + +static int +cfs_hash_hd_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnode) +{ + struct cfs_hash_head_dep *hh; + + hh = container_of(cfs_hash_hd_hhead(hs, bd), + struct cfs_hash_head_dep, hd_head); + hlist_del_init(hnode); + return --hh->hd_depth; +} + +/** + * double links hash head without depth tracking + * new element is always added to tail of hlist + */ +struct cfs_hash_dhead { + struct hlist_head dh_head; /**< entries list */ + struct hlist_node *dh_tail; /**< the last entry */ +}; + +static int +cfs_hash_dh_hhead_size(struct cfs_hash *hs) +{ + return sizeof(struct cfs_hash_dhead); +} + +static struct hlist_head * +cfs_hash_dh_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd) +{ + struct cfs_hash_dhead *head; + + head = (struct cfs_hash_dhead *)&bd->bd_bucket->hsb_head[0]; + return &head[bd->bd_offset].dh_head; +} + +static int +cfs_hash_dh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnode) +{ + struct cfs_hash_dhead *dh; + + dh = container_of(cfs_hash_dh_hhead(hs, bd), + struct cfs_hash_dhead, dh_head); + if (dh->dh_tail) /* not empty */ + hlist_add_behind(hnode, dh->dh_tail); + else /* empty list */ + hlist_add_head(hnode, &dh->dh_head); + dh->dh_tail = hnode; + return -1; /* unknown depth */ +} + +static int +cfs_hash_dh_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnd) +{ + struct cfs_hash_dhead *dh; + + dh = container_of(cfs_hash_dh_hhead(hs, bd), + struct cfs_hash_dhead, dh_head); + if (!hnd->next) { /* it's the tail */ + dh->dh_tail = (hnd->pprev == &dh->dh_head.first) ? NULL : + container_of(hnd->pprev, struct hlist_node, next); + } + hlist_del_init(hnd); + return -1; /* unknown depth */ +} + +/** + * double links hash head with depth tracking + * new element is always added to tail of hlist + */ +struct cfs_hash_dhead_dep { + struct hlist_head dd_head; /**< entries list */ + struct hlist_node *dd_tail; /**< the last entry */ + unsigned int dd_depth; /**< list length */ +}; + +static int +cfs_hash_dd_hhead_size(struct cfs_hash *hs) +{ + return sizeof(struct cfs_hash_dhead_dep); +} + +static struct hlist_head * +cfs_hash_dd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd) +{ + struct cfs_hash_dhead_dep *head; + + head = (struct cfs_hash_dhead_dep *)&bd->bd_bucket->hsb_head[0]; + return &head[bd->bd_offset].dd_head; +} + +static int +cfs_hash_dd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnode) +{ + struct cfs_hash_dhead_dep *dh; + + dh = container_of(cfs_hash_dd_hhead(hs, bd), + struct cfs_hash_dhead_dep, dd_head); + if (dh->dd_tail) /* not empty */ + hlist_add_behind(hnode, dh->dd_tail); + else /* empty list */ + hlist_add_head(hnode, &dh->dd_head); + dh->dd_tail = hnode; + return ++dh->dd_depth; +} + +static int +cfs_hash_dd_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnd) +{ + struct cfs_hash_dhead_dep *dh; + + dh = container_of(cfs_hash_dd_hhead(hs, bd), + struct cfs_hash_dhead_dep, dd_head); + if (!hnd->next) { /* it's the tail */ + dh->dd_tail = (hnd->pprev == &dh->dd_head.first) ? NULL : + container_of(hnd->pprev, struct hlist_node, next); + } + hlist_del_init(hnd); + return --dh->dd_depth; +} + +static struct cfs_hash_hlist_ops cfs_hash_hh_hops = { + .hop_hhead = cfs_hash_hh_hhead, + .hop_hhead_size = cfs_hash_hh_hhead_size, + .hop_hnode_add = cfs_hash_hh_hnode_add, + .hop_hnode_del = cfs_hash_hh_hnode_del, +}; + +static struct cfs_hash_hlist_ops cfs_hash_hd_hops = { + .hop_hhead = cfs_hash_hd_hhead, + .hop_hhead_size = cfs_hash_hd_hhead_size, + .hop_hnode_add = cfs_hash_hd_hnode_add, + .hop_hnode_del = cfs_hash_hd_hnode_del, +}; + +static struct cfs_hash_hlist_ops cfs_hash_dh_hops = { + .hop_hhead = cfs_hash_dh_hhead, + .hop_hhead_size = cfs_hash_dh_hhead_size, + .hop_hnode_add = cfs_hash_dh_hnode_add, + .hop_hnode_del = cfs_hash_dh_hnode_del, +}; + +static struct cfs_hash_hlist_ops cfs_hash_dd_hops = { + .hop_hhead = cfs_hash_dd_hhead, + .hop_hhead_size = cfs_hash_dd_hhead_size, + .hop_hnode_add = cfs_hash_dd_hnode_add, + .hop_hnode_del = cfs_hash_dd_hnode_del, +}; + +static void +cfs_hash_hlist_setup(struct cfs_hash *hs) +{ + if (cfs_hash_with_add_tail(hs)) { + hs->hs_hops = cfs_hash_with_depth(hs) ? + &cfs_hash_dd_hops : &cfs_hash_dh_hops; + } else { + hs->hs_hops = cfs_hash_with_depth(hs) ? + &cfs_hash_hd_hops : &cfs_hash_hh_hops; + } +} + +static void +cfs_hash_bd_from_key(struct cfs_hash *hs, struct cfs_hash_bucket **bkts, + unsigned int bits, const void *key, struct cfs_hash_bd *bd) +{ + unsigned int index = cfs_hash_id(hs, key, (1U << bits) - 1); + + LASSERT(bits == hs->hs_cur_bits || bits == hs->hs_rehash_bits); + + bd->bd_bucket = bkts[index & ((1U << (bits - hs->hs_bkt_bits)) - 1)]; + bd->bd_offset = index >> (bits - hs->hs_bkt_bits); +} + +void +cfs_hash_bd_get(struct cfs_hash *hs, const void *key, struct cfs_hash_bd *bd) +{ + /* NB: caller should hold hs->hs_rwlock if REHASH is set */ + if (likely(!hs->hs_rehash_buckets)) { + cfs_hash_bd_from_key(hs, hs->hs_buckets, + hs->hs_cur_bits, key, bd); + } else { + LASSERT(hs->hs_rehash_bits != 0); + cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets, + hs->hs_rehash_bits, key, bd); + } +} +EXPORT_SYMBOL(cfs_hash_bd_get); + +static inline void +cfs_hash_bd_dep_record(struct cfs_hash *hs, struct cfs_hash_bd *bd, int dep_cur) +{ + if (likely(dep_cur <= bd->bd_bucket->hsb_depmax)) + return; + + bd->bd_bucket->hsb_depmax = dep_cur; +# if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 + if (likely(warn_on_depth == 0 || + max(warn_on_depth, hs->hs_dep_max) >= dep_cur)) + return; + + spin_lock(&hs->hs_dep_lock); + hs->hs_dep_max = dep_cur; + hs->hs_dep_bkt = bd->bd_bucket->hsb_index; + hs->hs_dep_off = bd->bd_offset; + hs->hs_dep_bits = hs->hs_cur_bits; + spin_unlock(&hs->hs_dep_lock); + + cfs_wi_schedule(cfs_sched_rehash, &hs->hs_dep_wi); +# endif +} + +void +cfs_hash_bd_add_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnode) +{ + int rc; + + rc = hs->hs_hops->hop_hnode_add(hs, bd, hnode); + cfs_hash_bd_dep_record(hs, bd, rc); + bd->bd_bucket->hsb_version++; + if (unlikely(bd->bd_bucket->hsb_version == 0)) + bd->bd_bucket->hsb_version++; + bd->bd_bucket->hsb_count++; + + if (cfs_hash_with_counter(hs)) + atomic_inc(&hs->hs_count); + if (!cfs_hash_with_no_itemref(hs)) + cfs_hash_get(hs, hnode); +} +EXPORT_SYMBOL(cfs_hash_bd_add_locked); + +void +cfs_hash_bd_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnode) +{ + hs->hs_hops->hop_hnode_del(hs, bd, hnode); + + LASSERT(bd->bd_bucket->hsb_count > 0); + bd->bd_bucket->hsb_count--; + bd->bd_bucket->hsb_version++; + if (unlikely(bd->bd_bucket->hsb_version == 0)) + bd->bd_bucket->hsb_version++; + + if (cfs_hash_with_counter(hs)) { + LASSERT(atomic_read(&hs->hs_count) > 0); + atomic_dec(&hs->hs_count); + } + if (!cfs_hash_with_no_itemref(hs)) + cfs_hash_put_locked(hs, hnode); +} +EXPORT_SYMBOL(cfs_hash_bd_del_locked); + +void +cfs_hash_bd_move_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd_old, + struct cfs_hash_bd *bd_new, struct hlist_node *hnode) +{ + struct cfs_hash_bucket *obkt = bd_old->bd_bucket; + struct cfs_hash_bucket *nbkt = bd_new->bd_bucket; + int rc; + + if (cfs_hash_bd_compare(bd_old, bd_new) == 0) + return; + + /* use cfs_hash_bd_hnode_add/del, to avoid atomic & refcount ops + * in cfs_hash_bd_del/add_locked + */ + hs->hs_hops->hop_hnode_del(hs, bd_old, hnode); + rc = hs->hs_hops->hop_hnode_add(hs, bd_new, hnode); + cfs_hash_bd_dep_record(hs, bd_new, rc); + + LASSERT(obkt->hsb_count > 0); + obkt->hsb_count--; + obkt->hsb_version++; + if (unlikely(obkt->hsb_version == 0)) + obkt->hsb_version++; + nbkt->hsb_count++; + nbkt->hsb_version++; + if (unlikely(nbkt->hsb_version == 0)) + nbkt->hsb_version++; +} + +enum { + /** always set, for sanity (avoid ZERO intent) */ + CFS_HS_LOOKUP_MASK_FIND = BIT(0), + /** return entry with a ref */ + CFS_HS_LOOKUP_MASK_REF = BIT(1), + /** add entry if not existing */ + CFS_HS_LOOKUP_MASK_ADD = BIT(2), + /** delete entry, ignore other masks */ + CFS_HS_LOOKUP_MASK_DEL = BIT(3), +}; + +enum cfs_hash_lookup_intent { + /** return item w/o refcount */ + CFS_HS_LOOKUP_IT_PEEK = CFS_HS_LOOKUP_MASK_FIND, + /** return item with refcount */ + CFS_HS_LOOKUP_IT_FIND = (CFS_HS_LOOKUP_MASK_FIND | + CFS_HS_LOOKUP_MASK_REF), + /** return item w/o refcount if existed, otherwise add */ + CFS_HS_LOOKUP_IT_ADD = (CFS_HS_LOOKUP_MASK_FIND | + CFS_HS_LOOKUP_MASK_ADD), + /** return item with refcount if existed, otherwise add */ + CFS_HS_LOOKUP_IT_FINDADD = (CFS_HS_LOOKUP_IT_FIND | + CFS_HS_LOOKUP_MASK_ADD), + /** delete if existed */ + CFS_HS_LOOKUP_IT_FINDDEL = (CFS_HS_LOOKUP_MASK_FIND | + CFS_HS_LOOKUP_MASK_DEL) +}; + +static struct hlist_node * +cfs_hash_bd_lookup_intent(struct cfs_hash *hs, struct cfs_hash_bd *bd, + const void *key, struct hlist_node *hnode, + enum cfs_hash_lookup_intent intent) + +{ + struct hlist_head *hhead = cfs_hash_bd_hhead(hs, bd); + struct hlist_node *ehnode; + struct hlist_node *match; + int intent_add = (intent & CFS_HS_LOOKUP_MASK_ADD) != 0; + + /* with this function, we can avoid a lot of useless refcount ops, + * which are expensive atomic operations most time. + */ + match = intent_add ? NULL : hnode; + hlist_for_each(ehnode, hhead) { + if (!cfs_hash_keycmp(hs, key, ehnode)) + continue; + + if (match && match != ehnode) /* can't match */ + continue; + + /* match and ... */ + if ((intent & CFS_HS_LOOKUP_MASK_DEL) != 0) { + cfs_hash_bd_del_locked(hs, bd, ehnode); + return ehnode; + } + + /* caller wants refcount? */ + if ((intent & CFS_HS_LOOKUP_MASK_REF) != 0) + cfs_hash_get(hs, ehnode); + return ehnode; + } + /* no match item */ + if (!intent_add) + return NULL; + + LASSERT(hnode); + cfs_hash_bd_add_locked(hs, bd, hnode); + return hnode; +} + +struct hlist_node * +cfs_hash_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, + const void *key) +{ + return cfs_hash_bd_lookup_intent(hs, bd, key, NULL, + CFS_HS_LOOKUP_IT_FIND); +} +EXPORT_SYMBOL(cfs_hash_bd_lookup_locked); + +struct hlist_node * +cfs_hash_bd_peek_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, + const void *key) +{ + return cfs_hash_bd_lookup_intent(hs, bd, key, NULL, + CFS_HS_LOOKUP_IT_PEEK); +} +EXPORT_SYMBOL(cfs_hash_bd_peek_locked); + +static void +cfs_hash_multi_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds, + unsigned n, int excl) +{ + struct cfs_hash_bucket *prev = NULL; + int i; + + /** + * bds must be ascendantly ordered by bd->bd_bucket->hsb_index. + * NB: it's possible that several bds point to the same bucket but + * have different bd::bd_offset, so need take care of deadlock. + */ + cfs_hash_for_each_bd(bds, n, i) { + if (prev == bds[i].bd_bucket) + continue; + + LASSERT(!prev || prev->hsb_index < bds[i].bd_bucket->hsb_index); + cfs_hash_bd_lock(hs, &bds[i], excl); + prev = bds[i].bd_bucket; + } +} + +static void +cfs_hash_multi_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds, + unsigned n, int excl) +{ + struct cfs_hash_bucket *prev = NULL; + int i; + + cfs_hash_for_each_bd(bds, n, i) { + if (prev != bds[i].bd_bucket) { + cfs_hash_bd_unlock(hs, &bds[i], excl); + prev = bds[i].bd_bucket; + } + } +} + +static struct hlist_node * +cfs_hash_multi_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, + unsigned n, const void *key) +{ + struct hlist_node *ehnode; + unsigned i; + + cfs_hash_for_each_bd(bds, n, i) { + ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, NULL, + CFS_HS_LOOKUP_IT_FIND); + if (ehnode) + return ehnode; + } + return NULL; +} + +static struct hlist_node * +cfs_hash_multi_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, + unsigned n, const void *key, + struct hlist_node *hnode, int noref) +{ + struct hlist_node *ehnode; + int intent; + unsigned i; + + LASSERT(hnode); + intent = (!noref * CFS_HS_LOOKUP_MASK_REF) | CFS_HS_LOOKUP_IT_PEEK; + + cfs_hash_for_each_bd(bds, n, i) { + ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, + NULL, intent); + if (ehnode) + return ehnode; + } + + if (i == 1) { /* only one bucket */ + cfs_hash_bd_add_locked(hs, &bds[0], hnode); + } else { + struct cfs_hash_bd mybd; + + cfs_hash_bd_get(hs, key, &mybd); + cfs_hash_bd_add_locked(hs, &mybd, hnode); + } + + return hnode; +} + +static struct hlist_node * +cfs_hash_multi_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, + unsigned n, const void *key, + struct hlist_node *hnode) +{ + struct hlist_node *ehnode; + unsigned int i; + + cfs_hash_for_each_bd(bds, n, i) { + ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, hnode, + CFS_HS_LOOKUP_IT_FINDDEL); + if (ehnode) + return ehnode; + } + return NULL; +} + +static void +cfs_hash_bd_order(struct cfs_hash_bd *bd1, struct cfs_hash_bd *bd2) +{ + int rc; + + if (!bd2->bd_bucket) + return; + + if (!bd1->bd_bucket) { + *bd1 = *bd2; + bd2->bd_bucket = NULL; + return; + } + + rc = cfs_hash_bd_compare(bd1, bd2); + if (!rc) + bd2->bd_bucket = NULL; + else if (rc > 0) + swap(*bd1, *bd2); /* swap bd1 and bd2 */ +} + +void +cfs_hash_dual_bd_get(struct cfs_hash *hs, const void *key, + struct cfs_hash_bd *bds) +{ + /* NB: caller should hold hs_lock.rw if REHASH is set */ + cfs_hash_bd_from_key(hs, hs->hs_buckets, + hs->hs_cur_bits, key, &bds[0]); + if (likely(!hs->hs_rehash_buckets)) { + /* no rehash or not rehashing */ + bds[1].bd_bucket = NULL; + return; + } + + LASSERT(hs->hs_rehash_bits != 0); + cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets, + hs->hs_rehash_bits, key, &bds[1]); + + cfs_hash_bd_order(&bds[0], &bds[1]); +} + +void +cfs_hash_dual_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds, int excl) +{ + cfs_hash_multi_bd_lock(hs, bds, 2, excl); +} + +void +cfs_hash_dual_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds, int excl) +{ + cfs_hash_multi_bd_unlock(hs, bds, 2, excl); +} + +struct hlist_node * +cfs_hash_dual_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, + const void *key) +{ + return cfs_hash_multi_bd_lookup_locked(hs, bds, 2, key); +} + +struct hlist_node * +cfs_hash_dual_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, + const void *key, struct hlist_node *hnode, + int noref) +{ + return cfs_hash_multi_bd_findadd_locked(hs, bds, 2, key, + hnode, noref); +} + +struct hlist_node * +cfs_hash_dual_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, + const void *key, struct hlist_node *hnode) +{ + return cfs_hash_multi_bd_finddel_locked(hs, bds, 2, key, hnode); +} + +static void +cfs_hash_buckets_free(struct cfs_hash_bucket **buckets, + int bkt_size, int prev_size, int size) +{ + int i; + + for (i = prev_size; i < size; i++) { + if (buckets[i]) + LIBCFS_FREE(buckets[i], bkt_size); + } + + LIBCFS_FREE(buckets, sizeof(buckets[0]) * size); +} + +/* + * Create or grow bucket memory. Return old_buckets if no allocation was + * needed, the newly allocated buckets if allocation was needed and + * successful, and NULL on error. + */ +static struct cfs_hash_bucket ** +cfs_hash_buckets_realloc(struct cfs_hash *hs, struct cfs_hash_bucket **old_bkts, + unsigned int old_size, unsigned int new_size) +{ + struct cfs_hash_bucket **new_bkts; + int i; + + LASSERT(old_size == 0 || old_bkts); + + if (old_bkts && old_size == new_size) + return old_bkts; + + LIBCFS_ALLOC(new_bkts, sizeof(new_bkts[0]) * new_size); + if (!new_bkts) + return NULL; + + if (old_bkts) { + memcpy(new_bkts, old_bkts, + min(old_size, new_size) * sizeof(*old_bkts)); + } + + for (i = old_size; i < new_size; i++) { + struct hlist_head *hhead; + struct cfs_hash_bd bd; + + LIBCFS_ALLOC(new_bkts[i], cfs_hash_bkt_size(hs)); + if (!new_bkts[i]) { + cfs_hash_buckets_free(new_bkts, cfs_hash_bkt_size(hs), + old_size, new_size); + return NULL; + } + + new_bkts[i]->hsb_index = i; + new_bkts[i]->hsb_version = 1; /* shouldn't be zero */ + new_bkts[i]->hsb_depmax = -1; /* unknown */ + bd.bd_bucket = new_bkts[i]; + cfs_hash_bd_for_each_hlist(hs, &bd, hhead) + INIT_HLIST_HEAD(hhead); + + if (cfs_hash_with_no_lock(hs) || + cfs_hash_with_no_bktlock(hs)) + continue; + + if (cfs_hash_with_rw_bktlock(hs)) + rwlock_init(&new_bkts[i]->hsb_lock.rw); + else if (cfs_hash_with_spin_bktlock(hs)) + spin_lock_init(&new_bkts[i]->hsb_lock.spin); + else + LBUG(); /* invalid use-case */ + } + return new_bkts; +} + +/** + * Initialize new libcfs hash, where: + * @name - Descriptive hash name + * @cur_bits - Initial hash table size, in bits + * @max_bits - Maximum allowed hash table resize, in bits + * @ops - Registered hash table operations + * @flags - CFS_HASH_REHASH enable synamic hash resizing + * - CFS_HASH_SORT enable chained hash sort + */ +static int cfs_hash_rehash_worker(cfs_workitem_t *wi); + +#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 +static int cfs_hash_dep_print(cfs_workitem_t *wi) +{ + struct cfs_hash *hs = container_of(wi, struct cfs_hash, hs_dep_wi); + int dep; + int bkt; + int off; + int bits; + + spin_lock(&hs->hs_dep_lock); + dep = hs->hs_dep_max; + bkt = hs->hs_dep_bkt; + off = hs->hs_dep_off; + bits = hs->hs_dep_bits; + spin_unlock(&hs->hs_dep_lock); + + LCONSOLE_WARN("#### HASH %s (bits: %d): max depth %d at bucket %d/%d\n", + hs->hs_name, bits, dep, bkt, off); + spin_lock(&hs->hs_dep_lock); + hs->hs_dep_bits = 0; /* mark as workitem done */ + spin_unlock(&hs->hs_dep_lock); + return 0; +} + +static void cfs_hash_depth_wi_init(struct cfs_hash *hs) +{ + spin_lock_init(&hs->hs_dep_lock); + cfs_wi_init(&hs->hs_dep_wi, hs, cfs_hash_dep_print); +} + +static void cfs_hash_depth_wi_cancel(struct cfs_hash *hs) +{ + if (cfs_wi_deschedule(cfs_sched_rehash, &hs->hs_dep_wi)) + return; + + spin_lock(&hs->hs_dep_lock); + while (hs->hs_dep_bits != 0) { + spin_unlock(&hs->hs_dep_lock); + cond_resched(); + spin_lock(&hs->hs_dep_lock); + } + spin_unlock(&hs->hs_dep_lock); +} + +#else /* CFS_HASH_DEBUG_LEVEL < CFS_HASH_DEBUG_1 */ + +static inline void cfs_hash_depth_wi_init(struct cfs_hash *hs) {} +static inline void cfs_hash_depth_wi_cancel(struct cfs_hash *hs) {} + +#endif /* CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 */ + +struct cfs_hash * +cfs_hash_create(char *name, unsigned cur_bits, unsigned max_bits, + unsigned bkt_bits, unsigned extra_bytes, + unsigned min_theta, unsigned max_theta, + struct cfs_hash_ops *ops, unsigned flags) +{ + struct cfs_hash *hs; + int len; + + CLASSERT(CFS_HASH_THETA_BITS < 15); + + LASSERT(name); + LASSERT(ops->hs_key); + LASSERT(ops->hs_hash); + LASSERT(ops->hs_object); + LASSERT(ops->hs_keycmp); + LASSERT(ops->hs_get); + LASSERT(ops->hs_put_locked); + + if ((flags & CFS_HASH_REHASH) != 0) + flags |= CFS_HASH_COUNTER; /* must have counter */ + + LASSERT(cur_bits > 0); + LASSERT(cur_bits >= bkt_bits); + LASSERT(max_bits >= cur_bits && max_bits < 31); + LASSERT(ergo((flags & CFS_HASH_REHASH) == 0, cur_bits == max_bits)); + LASSERT(ergo((flags & CFS_HASH_REHASH) != 0, + (flags & CFS_HASH_NO_LOCK) == 0)); + LASSERT(ergo((flags & CFS_HASH_REHASH_KEY) != 0, ops->hs_keycpy)); + + len = (flags & CFS_HASH_BIGNAME) == 0 ? + CFS_HASH_NAME_LEN : CFS_HASH_BIGNAME_LEN; + LIBCFS_ALLOC(hs, offsetof(struct cfs_hash, hs_name[len])); + if (!hs) + return NULL; + + strlcpy(hs->hs_name, name, len); + hs->hs_flags = flags; + + atomic_set(&hs->hs_refcount, 1); + atomic_set(&hs->hs_count, 0); + + cfs_hash_lock_setup(hs); + cfs_hash_hlist_setup(hs); + + hs->hs_cur_bits = (__u8)cur_bits; + hs->hs_min_bits = (__u8)cur_bits; + hs->hs_max_bits = (__u8)max_bits; + hs->hs_bkt_bits = (__u8)bkt_bits; + + hs->hs_ops = ops; + hs->hs_extra_bytes = extra_bytes; + hs->hs_rehash_bits = 0; + cfs_wi_init(&hs->hs_rehash_wi, hs, cfs_hash_rehash_worker); + cfs_hash_depth_wi_init(hs); + + if (cfs_hash_with_rehash(hs)) + __cfs_hash_set_theta(hs, min_theta, max_theta); + + hs->hs_buckets = cfs_hash_buckets_realloc(hs, NULL, 0, + CFS_HASH_NBKT(hs)); + if (hs->hs_buckets) + return hs; + + LIBCFS_FREE(hs, offsetof(struct cfs_hash, hs_name[len])); + return NULL; +} +EXPORT_SYMBOL(cfs_hash_create); + +/** + * Cleanup libcfs hash @hs. + */ +static void +cfs_hash_destroy(struct cfs_hash *hs) +{ + struct hlist_node *hnode; + struct hlist_node *pos; + struct cfs_hash_bd bd; + int i; + + LASSERT(hs); + LASSERT(!cfs_hash_is_exiting(hs) && + !cfs_hash_is_iterating(hs)); + + /** + * prohibit further rehashes, don't need any lock because + * I'm the only (last) one can change it. + */ + hs->hs_exiting = 1; + if (cfs_hash_with_rehash(hs)) + cfs_hash_rehash_cancel(hs); + + cfs_hash_depth_wi_cancel(hs); + /* rehash should be done/canceled */ + LASSERT(hs->hs_buckets && !hs->hs_rehash_buckets); + + cfs_hash_for_each_bucket(hs, &bd, i) { + struct hlist_head *hhead; + + LASSERT(bd.bd_bucket); + /* no need to take this lock, just for consistent code */ + cfs_hash_bd_lock(hs, &bd, 1); + + cfs_hash_bd_for_each_hlist(hs, &bd, hhead) { + hlist_for_each_safe(hnode, pos, hhead) { + LASSERTF(!cfs_hash_with_assert_empty(hs), + "hash %s bucket %u(%u) is not empty: %u items left\n", + hs->hs_name, bd.bd_bucket->hsb_index, + bd.bd_offset, bd.bd_bucket->hsb_count); + /* can't assert key valicate, because we + * can interrupt rehash + */ + cfs_hash_bd_del_locked(hs, &bd, hnode); + cfs_hash_exit(hs, hnode); + } + } + LASSERT(bd.bd_bucket->hsb_count == 0); + cfs_hash_bd_unlock(hs, &bd, 1); + cond_resched(); + } + + LASSERT(atomic_read(&hs->hs_count) == 0); + + cfs_hash_buckets_free(hs->hs_buckets, cfs_hash_bkt_size(hs), + 0, CFS_HASH_NBKT(hs)); + i = cfs_hash_with_bigname(hs) ? + CFS_HASH_BIGNAME_LEN : CFS_HASH_NAME_LEN; + LIBCFS_FREE(hs, offsetof(struct cfs_hash, hs_name[i])); +} + +struct cfs_hash *cfs_hash_getref(struct cfs_hash *hs) +{ + if (atomic_inc_not_zero(&hs->hs_refcount)) + return hs; + return NULL; +} +EXPORT_SYMBOL(cfs_hash_getref); + +void cfs_hash_putref(struct cfs_hash *hs) +{ + if (atomic_dec_and_test(&hs->hs_refcount)) + cfs_hash_destroy(hs); +} +EXPORT_SYMBOL(cfs_hash_putref); + +static inline int +cfs_hash_rehash_bits(struct cfs_hash *hs) +{ + if (cfs_hash_with_no_lock(hs) || + !cfs_hash_with_rehash(hs)) + return -EOPNOTSUPP; + + if (unlikely(cfs_hash_is_exiting(hs))) + return -ESRCH; + + if (unlikely(cfs_hash_is_rehashing(hs))) + return -EALREADY; + + if (unlikely(cfs_hash_is_iterating(hs))) + return -EAGAIN; + + /* XXX: need to handle case with max_theta != 2.0 + * and the case with min_theta != 0.5 + */ + if ((hs->hs_cur_bits < hs->hs_max_bits) && + (__cfs_hash_theta(hs) > hs->hs_max_theta)) + return hs->hs_cur_bits + 1; + + if (!cfs_hash_with_shrink(hs)) + return 0; + + if ((hs->hs_cur_bits > hs->hs_min_bits) && + (__cfs_hash_theta(hs) < hs->hs_min_theta)) + return hs->hs_cur_bits - 1; + + return 0; +} + +/** + * don't allow inline rehash if: + * - user wants non-blocking change (add/del) on hash table + * - too many elements + */ +static inline int +cfs_hash_rehash_inline(struct cfs_hash *hs) +{ + return !cfs_hash_with_nblk_change(hs) && + atomic_read(&hs->hs_count) < CFS_HASH_LOOP_HOG; +} + +/** + * Add item @hnode to libcfs hash @hs using @key. The registered + * ops->hs_get function will be called when the item is added. + */ +void +cfs_hash_add(struct cfs_hash *hs, const void *key, struct hlist_node *hnode) +{ + struct cfs_hash_bd bd; + int bits; + + LASSERT(hlist_unhashed(hnode)); + + cfs_hash_lock(hs, 0); + cfs_hash_bd_get_and_lock(hs, key, &bd, 1); + + cfs_hash_key_validate(hs, key, hnode); + cfs_hash_bd_add_locked(hs, &bd, hnode); + + cfs_hash_bd_unlock(hs, &bd, 1); + + bits = cfs_hash_rehash_bits(hs); + cfs_hash_unlock(hs, 0); + if (bits > 0) + cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs)); +} +EXPORT_SYMBOL(cfs_hash_add); + +static struct hlist_node * +cfs_hash_find_or_add(struct cfs_hash *hs, const void *key, + struct hlist_node *hnode, int noref) +{ + struct hlist_node *ehnode; + struct cfs_hash_bd bds[2]; + int bits = 0; + + LASSERT(hlist_unhashed(hnode)); + + cfs_hash_lock(hs, 0); + cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1); + + cfs_hash_key_validate(hs, key, hnode); + ehnode = cfs_hash_dual_bd_findadd_locked(hs, bds, key, + hnode, noref); + cfs_hash_dual_bd_unlock(hs, bds, 1); + + if (ehnode == hnode) /* new item added */ + bits = cfs_hash_rehash_bits(hs); + cfs_hash_unlock(hs, 0); + if (bits > 0) + cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs)); + + return ehnode; +} + +/** + * Add item @hnode to libcfs hash @hs using @key. The registered + * ops->hs_get function will be called if the item was added. + * Returns 0 on success or -EALREADY on key collisions. + */ +int +cfs_hash_add_unique(struct cfs_hash *hs, const void *key, + struct hlist_node *hnode) +{ + return cfs_hash_find_or_add(hs, key, hnode, 1) != hnode ? + -EALREADY : 0; +} +EXPORT_SYMBOL(cfs_hash_add_unique); + +/** + * Add item @hnode to libcfs hash @hs using @key. If this @key + * already exists in the hash then ops->hs_get will be called on the + * conflicting entry and that entry will be returned to the caller. + * Otherwise ops->hs_get is called on the item which was added. + */ +void * +cfs_hash_findadd_unique(struct cfs_hash *hs, const void *key, + struct hlist_node *hnode) +{ + hnode = cfs_hash_find_or_add(hs, key, hnode, 0); + + return cfs_hash_object(hs, hnode); +} +EXPORT_SYMBOL(cfs_hash_findadd_unique); + +/** + * Delete item @hnode from the libcfs hash @hs using @key. The @key + * is required to ensure the correct hash bucket is locked since there + * is no direct linkage from the item to the bucket. The object + * removed from the hash will be returned and obs->hs_put is called + * on the removed object. + */ +void * +cfs_hash_del(struct cfs_hash *hs, const void *key, struct hlist_node *hnode) +{ + void *obj = NULL; + int bits = 0; + struct cfs_hash_bd bds[2]; + + cfs_hash_lock(hs, 0); + cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1); + + /* NB: do nothing if @hnode is not in hash table */ + if (!hnode || !hlist_unhashed(hnode)) { + if (!bds[1].bd_bucket && hnode) { + cfs_hash_bd_del_locked(hs, &bds[0], hnode); + } else { + hnode = cfs_hash_dual_bd_finddel_locked(hs, bds, + key, hnode); + } + } + + if (hnode) { + obj = cfs_hash_object(hs, hnode); + bits = cfs_hash_rehash_bits(hs); + } + + cfs_hash_dual_bd_unlock(hs, bds, 1); + cfs_hash_unlock(hs, 0); + if (bits > 0) + cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs)); + + return obj; +} +EXPORT_SYMBOL(cfs_hash_del); + +/** + * Delete item given @key in libcfs hash @hs. The first @key found in + * the hash will be removed, if the key exists multiple times in the hash + * @hs this function must be called once per key. The removed object + * will be returned and ops->hs_put is called on the removed object. + */ +void * +cfs_hash_del_key(struct cfs_hash *hs, const void *key) +{ + return cfs_hash_del(hs, key, NULL); +} +EXPORT_SYMBOL(cfs_hash_del_key); + +/** + * Lookup an item using @key in the libcfs hash @hs and return it. + * If the @key is found in the hash hs->hs_get() is called and the + * matching objects is returned. It is the callers responsibility + * to call the counterpart ops->hs_put using the cfs_hash_put() macro + * when when finished with the object. If the @key was not found + * in the hash @hs NULL is returned. + */ +void * +cfs_hash_lookup(struct cfs_hash *hs, const void *key) +{ + void *obj = NULL; + struct hlist_node *hnode; + struct cfs_hash_bd bds[2]; + + cfs_hash_lock(hs, 0); + cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0); + + hnode = cfs_hash_dual_bd_lookup_locked(hs, bds, key); + if (hnode) + obj = cfs_hash_object(hs, hnode); + + cfs_hash_dual_bd_unlock(hs, bds, 0); + cfs_hash_unlock(hs, 0); + + return obj; +} +EXPORT_SYMBOL(cfs_hash_lookup); + +static void +cfs_hash_for_each_enter(struct cfs_hash *hs) +{ + LASSERT(!cfs_hash_is_exiting(hs)); + + if (!cfs_hash_with_rehash(hs)) + return; + /* + * NB: it's race on cfs_has_t::hs_iterating, but doesn't matter + * because it's just an unreliable signal to rehash-thread, + * rehash-thread will try to finish rehash ASAP when seeing this. + */ + hs->hs_iterating = 1; + + cfs_hash_lock(hs, 1); + hs->hs_iterators++; + + /* NB: iteration is mostly called by service thread, + * we tend to cancel pending rehash-request, instead of + * blocking service thread, we will relaunch rehash request + * after iteration + */ + if (cfs_hash_is_rehashing(hs)) + cfs_hash_rehash_cancel_locked(hs); + cfs_hash_unlock(hs, 1); +} + +static void +cfs_hash_for_each_exit(struct cfs_hash *hs) +{ + int remained; + int bits; + + if (!cfs_hash_with_rehash(hs)) + return; + cfs_hash_lock(hs, 1); + remained = --hs->hs_iterators; + bits = cfs_hash_rehash_bits(hs); + cfs_hash_unlock(hs, 1); + /* NB: it's race on cfs_has_t::hs_iterating, see above */ + if (remained == 0) + hs->hs_iterating = 0; + if (bits > 0) { + cfs_hash_rehash(hs, atomic_read(&hs->hs_count) < + CFS_HASH_LOOP_HOG); + } +} + +/** + * For each item in the libcfs hash @hs call the passed callback @func + * and pass to it as an argument each hash item and the private @data. + * + * a) the function may sleep! + * b) during the callback: + * . the bucket lock is held so the callback must never sleep. + * . if @removal_safe is true, use can remove current item by + * cfs_hash_bd_del_locked + */ +static __u64 +cfs_hash_for_each_tight(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, + void *data, int remove_safe) +{ + struct hlist_node *hnode; + struct hlist_node *pos; + struct cfs_hash_bd bd; + __u64 count = 0; + int excl = !!remove_safe; + int loop = 0; + int i; + + cfs_hash_for_each_enter(hs); + + cfs_hash_lock(hs, 0); + LASSERT(!cfs_hash_is_rehashing(hs)); + + cfs_hash_for_each_bucket(hs, &bd, i) { + struct hlist_head *hhead; + + cfs_hash_bd_lock(hs, &bd, excl); + if (!func) { /* only glimpse size */ + count += bd.bd_bucket->hsb_count; + cfs_hash_bd_unlock(hs, &bd, excl); + continue; + } + + cfs_hash_bd_for_each_hlist(hs, &bd, hhead) { + hlist_for_each_safe(hnode, pos, hhead) { + cfs_hash_bucket_validate(hs, &bd, hnode); + count++; + loop++; + if (func(hs, &bd, hnode, data)) { + cfs_hash_bd_unlock(hs, &bd, excl); + goto out; + } + } + } + cfs_hash_bd_unlock(hs, &bd, excl); + if (loop < CFS_HASH_LOOP_HOG) + continue; + loop = 0; + cfs_hash_unlock(hs, 0); + cond_resched(); + cfs_hash_lock(hs, 0); + } + out: + cfs_hash_unlock(hs, 0); + + cfs_hash_for_each_exit(hs); + return count; +} + +struct cfs_hash_cond_arg { + cfs_hash_cond_opt_cb_t func; + void *arg; +}; + +static int +cfs_hash_cond_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnode, void *data) +{ + struct cfs_hash_cond_arg *cond = data; + + if (cond->func(cfs_hash_object(hs, hnode), cond->arg)) + cfs_hash_bd_del_locked(hs, bd, hnode); + return 0; +} + +/** + * Delete item from the libcfs hash @hs when @func return true. + * The write lock being hold during loop for each bucket to avoid + * any object be reference. + */ +void +cfs_hash_cond_del(struct cfs_hash *hs, cfs_hash_cond_opt_cb_t func, void *data) +{ + struct cfs_hash_cond_arg arg = { + .func = func, + .arg = data, + }; + + cfs_hash_for_each_tight(hs, cfs_hash_cond_del_locked, &arg, 1); +} +EXPORT_SYMBOL(cfs_hash_cond_del); + +void +cfs_hash_for_each(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, + void *data) +{ + cfs_hash_for_each_tight(hs, func, data, 0); +} +EXPORT_SYMBOL(cfs_hash_for_each); + +void +cfs_hash_for_each_safe(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, + void *data) +{ + cfs_hash_for_each_tight(hs, func, data, 1); +} +EXPORT_SYMBOL(cfs_hash_for_each_safe); + +static int +cfs_hash_peek(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnode, void *data) +{ + *(int *)data = 0; + return 1; /* return 1 to break the loop */ +} + +int +cfs_hash_is_empty(struct cfs_hash *hs) +{ + int empty = 1; + + cfs_hash_for_each_tight(hs, cfs_hash_peek, &empty, 0); + return empty; +} +EXPORT_SYMBOL(cfs_hash_is_empty); + +__u64 +cfs_hash_size_get(struct cfs_hash *hs) +{ + return cfs_hash_with_counter(hs) ? + atomic_read(&hs->hs_count) : + cfs_hash_for_each_tight(hs, NULL, NULL, 0); +} +EXPORT_SYMBOL(cfs_hash_size_get); + +/* + * cfs_hash_for_each_relax: + * Iterate the hash table and call @func on each item without + * any lock. This function can't guarantee to finish iteration + * if these features are enabled: + * + * a. if rehash_key is enabled, an item can be moved from + * one bucket to another bucket + * b. user can remove non-zero-ref item from hash-table, + * so the item can be removed from hash-table, even worse, + * it's possible that user changed key and insert to another + * hash bucket. + * there's no way for us to finish iteration correctly on previous + * two cases, so iteration has to be stopped on change. + */ +static int +cfs_hash_for_each_relax(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, + void *data) +{ + struct hlist_node *hnode; + struct hlist_node *tmp; + struct cfs_hash_bd bd; + __u32 version; + int count = 0; + int stop_on_change; + int rc; + int i; + + stop_on_change = cfs_hash_with_rehash_key(hs) || + !cfs_hash_with_no_itemref(hs) || + !hs->hs_ops->hs_put_locked; + cfs_hash_lock(hs, 0); + LASSERT(!cfs_hash_is_rehashing(hs)); + + cfs_hash_for_each_bucket(hs, &bd, i) { + struct hlist_head *hhead; + + cfs_hash_bd_lock(hs, &bd, 0); + version = cfs_hash_bd_version_get(&bd); + + cfs_hash_bd_for_each_hlist(hs, &bd, hhead) { + for (hnode = hhead->first; hnode;) { + cfs_hash_bucket_validate(hs, &bd, hnode); + cfs_hash_get(hs, hnode); + cfs_hash_bd_unlock(hs, &bd, 0); + cfs_hash_unlock(hs, 0); + + rc = func(hs, &bd, hnode, data); + if (stop_on_change) + cfs_hash_put(hs, hnode); + cond_resched(); + count++; + + cfs_hash_lock(hs, 0); + cfs_hash_bd_lock(hs, &bd, 0); + if (!stop_on_change) { + tmp = hnode->next; + cfs_hash_put_locked(hs, hnode); + hnode = tmp; + } else { /* bucket changed? */ + if (version != + cfs_hash_bd_version_get(&bd)) + break; + /* safe to continue because no change */ + hnode = hnode->next; + } + if (rc) /* callback wants to break iteration */ + break; + } + if (rc) /* callback wants to break iteration */ + break; + } + cfs_hash_bd_unlock(hs, &bd, 0); + if (rc) /* callback wants to break iteration */ + break; + } + cfs_hash_unlock(hs, 0); + + return count; +} + +int +cfs_hash_for_each_nolock(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, + void *data) +{ + if (cfs_hash_with_no_lock(hs) || + cfs_hash_with_rehash_key(hs) || + !cfs_hash_with_no_itemref(hs)) + return -EOPNOTSUPP; + + if (!hs->hs_ops->hs_get || + (!hs->hs_ops->hs_put && !hs->hs_ops->hs_put_locked)) + return -EOPNOTSUPP; + + cfs_hash_for_each_enter(hs); + cfs_hash_for_each_relax(hs, func, data); + cfs_hash_for_each_exit(hs); + + return 0; +} +EXPORT_SYMBOL(cfs_hash_for_each_nolock); + +/** + * For each hash bucket in the libcfs hash @hs call the passed callback + * @func until all the hash buckets are empty. The passed callback @func + * or the previously registered callback hs->hs_put must remove the item + * from the hash. You may either use the cfs_hash_del() or hlist_del() + * functions. No rwlocks will be held during the callback @func it is + * safe to sleep if needed. This function will not terminate until the + * hash is empty. Note it is still possible to concurrently add new + * items in to the hash. It is the callers responsibility to ensure + * the required locking is in place to prevent concurrent insertions. + */ +int +cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, + void *data) +{ + unsigned i = 0; + + if (cfs_hash_with_no_lock(hs)) + return -EOPNOTSUPP; + + if (!hs->hs_ops->hs_get || + (!hs->hs_ops->hs_put && !hs->hs_ops->hs_put_locked)) + return -EOPNOTSUPP; + + cfs_hash_for_each_enter(hs); + while (cfs_hash_for_each_relax(hs, func, data)) { + CDEBUG(D_INFO, "Try to empty hash: %s, loop: %u\n", + hs->hs_name, i++); + } + cfs_hash_for_each_exit(hs); + return 0; +} +EXPORT_SYMBOL(cfs_hash_for_each_empty); + +void +cfs_hash_hlist_for_each(struct cfs_hash *hs, unsigned hindex, + cfs_hash_for_each_cb_t func, void *data) +{ + struct hlist_head *hhead; + struct hlist_node *hnode; + struct cfs_hash_bd bd; + + cfs_hash_for_each_enter(hs); + cfs_hash_lock(hs, 0); + if (hindex >= CFS_HASH_NHLIST(hs)) + goto out; + + cfs_hash_bd_index_set(hs, hindex, &bd); + + cfs_hash_bd_lock(hs, &bd, 0); + hhead = cfs_hash_bd_hhead(hs, &bd); + hlist_for_each(hnode, hhead) { + if (func(hs, &bd, hnode, data)) + break; + } + cfs_hash_bd_unlock(hs, &bd, 0); +out: + cfs_hash_unlock(hs, 0); + cfs_hash_for_each_exit(hs); +} +EXPORT_SYMBOL(cfs_hash_hlist_for_each); + +/* + * For each item in the libcfs hash @hs which matches the @key call + * the passed callback @func and pass to it as an argument each hash + * item and the private @data. During the callback the bucket lock + * is held so the callback must never sleep. + */ +void +cfs_hash_for_each_key(struct cfs_hash *hs, const void *key, + cfs_hash_for_each_cb_t func, void *data) +{ + struct hlist_node *hnode; + struct cfs_hash_bd bds[2]; + unsigned int i; + + cfs_hash_lock(hs, 0); + + cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0); + + cfs_hash_for_each_bd(bds, 2, i) { + struct hlist_head *hlist = cfs_hash_bd_hhead(hs, &bds[i]); + + hlist_for_each(hnode, hlist) { + cfs_hash_bucket_validate(hs, &bds[i], hnode); + + if (cfs_hash_keycmp(hs, key, hnode)) { + if (func(hs, &bds[i], hnode, data)) + break; + } + } + } + + cfs_hash_dual_bd_unlock(hs, bds, 0); + cfs_hash_unlock(hs, 0); +} +EXPORT_SYMBOL(cfs_hash_for_each_key); + +/** + * Rehash the libcfs hash @hs to the given @bits. This can be used + * to grow the hash size when excessive chaining is detected, or to + * shrink the hash when it is larger than needed. When the CFS_HASH_REHASH + * flag is set in @hs the libcfs hash may be dynamically rehashed + * during addition or removal if the hash's theta value exceeds + * either the hs->hs_min_theta or hs->max_theta values. By default + * these values are tuned to keep the chained hash depth small, and + * this approach assumes a reasonably uniform hashing function. The + * theta thresholds for @hs are tunable via cfs_hash_set_theta(). + */ +void +cfs_hash_rehash_cancel_locked(struct cfs_hash *hs) +{ + int i; + + /* need hold cfs_hash_lock(hs, 1) */ + LASSERT(cfs_hash_with_rehash(hs) && + !cfs_hash_with_no_lock(hs)); + + if (!cfs_hash_is_rehashing(hs)) + return; + + if (cfs_wi_deschedule(cfs_sched_rehash, &hs->hs_rehash_wi)) { + hs->hs_rehash_bits = 0; + return; + } + + for (i = 2; cfs_hash_is_rehashing(hs); i++) { + cfs_hash_unlock(hs, 1); + /* raise console warning while waiting too long */ + CDEBUG(is_power_of_2(i >> 3) ? D_WARNING : D_INFO, + "hash %s is still rehashing, rescheded %d\n", + hs->hs_name, i - 1); + cond_resched(); + cfs_hash_lock(hs, 1); + } +} + +void +cfs_hash_rehash_cancel(struct cfs_hash *hs) +{ + cfs_hash_lock(hs, 1); + cfs_hash_rehash_cancel_locked(hs); + cfs_hash_unlock(hs, 1); +} + +int +cfs_hash_rehash(struct cfs_hash *hs, int do_rehash) +{ + int rc; + + LASSERT(cfs_hash_with_rehash(hs) && !cfs_hash_with_no_lock(hs)); + + cfs_hash_lock(hs, 1); + + rc = cfs_hash_rehash_bits(hs); + if (rc <= 0) { + cfs_hash_unlock(hs, 1); + return rc; + } + + hs->hs_rehash_bits = rc; + if (!do_rehash) { + /* launch and return */ + cfs_wi_schedule(cfs_sched_rehash, &hs->hs_rehash_wi); + cfs_hash_unlock(hs, 1); + return 0; + } + + /* rehash right now */ + cfs_hash_unlock(hs, 1); + + return cfs_hash_rehash_worker(&hs->hs_rehash_wi); +} + +static int +cfs_hash_rehash_bd(struct cfs_hash *hs, struct cfs_hash_bd *old) +{ + struct cfs_hash_bd new; + struct hlist_head *hhead; + struct hlist_node *hnode; + struct hlist_node *pos; + void *key; + int c = 0; + + /* hold cfs_hash_lock(hs, 1), so don't need any bucket lock */ + cfs_hash_bd_for_each_hlist(hs, old, hhead) { + hlist_for_each_safe(hnode, pos, hhead) { + key = cfs_hash_key(hs, hnode); + LASSERT(key); + /* Validate hnode is in the correct bucket. */ + cfs_hash_bucket_validate(hs, old, hnode); + /* + * Delete from old hash bucket; move to new bucket. + * ops->hs_key must be defined. + */ + cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets, + hs->hs_rehash_bits, key, &new); + cfs_hash_bd_move_locked(hs, old, &new, hnode); + c++; + } + } + + return c; +} + +static int +cfs_hash_rehash_worker(cfs_workitem_t *wi) +{ + struct cfs_hash *hs = container_of(wi, struct cfs_hash, hs_rehash_wi); + struct cfs_hash_bucket **bkts; + struct cfs_hash_bd bd; + unsigned int old_size; + unsigned int new_size; + int bsize; + int count = 0; + int rc = 0; + int i; + + LASSERT(hs && cfs_hash_with_rehash(hs)); + + cfs_hash_lock(hs, 0); + LASSERT(cfs_hash_is_rehashing(hs)); + + old_size = CFS_HASH_NBKT(hs); + new_size = CFS_HASH_RH_NBKT(hs); + + cfs_hash_unlock(hs, 0); + + /* + * don't need hs::hs_rwlock for hs::hs_buckets, + * because nobody can change bkt-table except me. + */ + bkts = cfs_hash_buckets_realloc(hs, hs->hs_buckets, + old_size, new_size); + cfs_hash_lock(hs, 1); + if (!bkts) { + rc = -ENOMEM; + goto out; + } + + if (bkts == hs->hs_buckets) { + bkts = NULL; /* do nothing */ + goto out; + } + + rc = __cfs_hash_theta(hs); + if ((rc >= hs->hs_min_theta) && (rc <= hs->hs_max_theta)) { + /* free the new allocated bkt-table */ + old_size = new_size; + new_size = CFS_HASH_NBKT(hs); + rc = -EALREADY; + goto out; + } + + LASSERT(!hs->hs_rehash_buckets); + hs->hs_rehash_buckets = bkts; + + rc = 0; + cfs_hash_for_each_bucket(hs, &bd, i) { + if (cfs_hash_is_exiting(hs)) { + rc = -ESRCH; + /* someone wants to destroy the hash, abort now */ + if (old_size < new_size) /* OK to free old bkt-table */ + break; + /* it's shrinking, need free new bkt-table */ + hs->hs_rehash_buckets = NULL; + old_size = new_size; + new_size = CFS_HASH_NBKT(hs); + goto out; + } + + count += cfs_hash_rehash_bd(hs, &bd); + if (count < CFS_HASH_LOOP_HOG || + cfs_hash_is_iterating(hs)) { /* need to finish ASAP */ + continue; + } + + count = 0; + cfs_hash_unlock(hs, 1); + cond_resched(); + cfs_hash_lock(hs, 1); + } + + hs->hs_rehash_count++; + + bkts = hs->hs_buckets; + hs->hs_buckets = hs->hs_rehash_buckets; + hs->hs_rehash_buckets = NULL; + + hs->hs_cur_bits = hs->hs_rehash_bits; +out: + hs->hs_rehash_bits = 0; + if (rc == -ESRCH) /* never be scheduled again */ + cfs_wi_exit(cfs_sched_rehash, wi); + bsize = cfs_hash_bkt_size(hs); + cfs_hash_unlock(hs, 1); + /* can't refer to @hs anymore because it could be destroyed */ + if (bkts) + cfs_hash_buckets_free(bkts, bsize, new_size, old_size); + if (rc != 0) + CDEBUG(D_INFO, "early quit of rehashing: %d\n", rc); + /* return 1 only if cfs_wi_exit is called */ + return rc == -ESRCH; +} + +/** + * Rehash the object referenced by @hnode in the libcfs hash @hs. The + * @old_key must be provided to locate the objects previous location + * in the hash, and the @new_key will be used to reinsert the object. + * Use this function instead of a cfs_hash_add() + cfs_hash_del() + * combo when it is critical that there is no window in time where the + * object is missing from the hash. When an object is being rehashed + * the registered cfs_hash_get() and cfs_hash_put() functions will + * not be called. + */ +void cfs_hash_rehash_key(struct cfs_hash *hs, const void *old_key, + void *new_key, struct hlist_node *hnode) +{ + struct cfs_hash_bd bds[3]; + struct cfs_hash_bd old_bds[2]; + struct cfs_hash_bd new_bd; + + LASSERT(!hlist_unhashed(hnode)); + + cfs_hash_lock(hs, 0); + + cfs_hash_dual_bd_get(hs, old_key, old_bds); + cfs_hash_bd_get(hs, new_key, &new_bd); + + bds[0] = old_bds[0]; + bds[1] = old_bds[1]; + bds[2] = new_bd; + + /* NB: bds[0] and bds[1] are ordered already */ + cfs_hash_bd_order(&bds[1], &bds[2]); + cfs_hash_bd_order(&bds[0], &bds[1]); + + cfs_hash_multi_bd_lock(hs, bds, 3, 1); + if (likely(!old_bds[1].bd_bucket)) { + cfs_hash_bd_move_locked(hs, &old_bds[0], &new_bd, hnode); + } else { + cfs_hash_dual_bd_finddel_locked(hs, old_bds, old_key, hnode); + cfs_hash_bd_add_locked(hs, &new_bd, hnode); + } + /* overwrite key inside locks, otherwise may screw up with + * other operations, i.e: rehash + */ + cfs_hash_keycpy(hs, hnode, new_key); + + cfs_hash_multi_bd_unlock(hs, bds, 3, 1); + cfs_hash_unlock(hs, 0); +} +EXPORT_SYMBOL(cfs_hash_rehash_key); + +void cfs_hash_debug_header(struct seq_file *m) +{ + seq_printf(m, "%-*s cur min max theta t-min t-max flags rehash count maxdep maxdepb distribution\n", + CFS_HASH_BIGNAME_LEN, "name"); +} +EXPORT_SYMBOL(cfs_hash_debug_header); + +static struct cfs_hash_bucket ** +cfs_hash_full_bkts(struct cfs_hash *hs) +{ + /* NB: caller should hold hs->hs_rwlock if REHASH is set */ + if (!hs->hs_rehash_buckets) + return hs->hs_buckets; + + LASSERT(hs->hs_rehash_bits != 0); + return hs->hs_rehash_bits > hs->hs_cur_bits ? + hs->hs_rehash_buckets : hs->hs_buckets; +} + +static unsigned int +cfs_hash_full_nbkt(struct cfs_hash *hs) +{ + /* NB: caller should hold hs->hs_rwlock if REHASH is set */ + if (!hs->hs_rehash_buckets) + return CFS_HASH_NBKT(hs); + + LASSERT(hs->hs_rehash_bits != 0); + return hs->hs_rehash_bits > hs->hs_cur_bits ? + CFS_HASH_RH_NBKT(hs) : CFS_HASH_NBKT(hs); +} + +void cfs_hash_debug_str(struct cfs_hash *hs, struct seq_file *m) +{ + int dist[8] = { 0, }; + int maxdep = -1; + int maxdepb = -1; + int total = 0; + int theta; + int i; + + cfs_hash_lock(hs, 0); + theta = __cfs_hash_theta(hs); + + seq_printf(m, "%-*s %5d %5d %5d %d.%03d %d.%03d %d.%03d 0x%02x %6d ", + CFS_HASH_BIGNAME_LEN, hs->hs_name, + 1 << hs->hs_cur_bits, 1 << hs->hs_min_bits, + 1 << hs->hs_max_bits, + __cfs_hash_theta_int(theta), __cfs_hash_theta_frac(theta), + __cfs_hash_theta_int(hs->hs_min_theta), + __cfs_hash_theta_frac(hs->hs_min_theta), + __cfs_hash_theta_int(hs->hs_max_theta), + __cfs_hash_theta_frac(hs->hs_max_theta), + hs->hs_flags, hs->hs_rehash_count); + + /* + * The distribution is a summary of the chained hash depth in + * each of the libcfs hash buckets. Each buckets hsb_count is + * divided by the hash theta value and used to generate a + * histogram of the hash distribution. A uniform hash will + * result in all hash buckets being close to the average thus + * only the first few entries in the histogram will be non-zero. + * If you hash function results in a non-uniform hash the will + * be observable by outlier bucks in the distribution histogram. + * + * Uniform hash distribution: 128/128/0/0/0/0/0/0 + * Non-Uniform hash distribution: 128/125/0/0/0/0/2/1 + */ + for (i = 0; i < cfs_hash_full_nbkt(hs); i++) { + struct cfs_hash_bd bd; + + bd.bd_bucket = cfs_hash_full_bkts(hs)[i]; + cfs_hash_bd_lock(hs, &bd, 0); + if (maxdep < bd.bd_bucket->hsb_depmax) { + maxdep = bd.bd_bucket->hsb_depmax; + maxdepb = ffz(~maxdep); + } + total += bd.bd_bucket->hsb_count; + dist[min(fls(bd.bd_bucket->hsb_count / max(theta, 1)), 7)]++; + cfs_hash_bd_unlock(hs, &bd, 0); + } + + seq_printf(m, "%7d %7d %7d ", total, maxdep, maxdepb); + for (i = 0; i < 8; i++) + seq_printf(m, "%d%c", dist[i], (i == 7) ? '\n' : '/'); + + cfs_hash_unlock(hs, 0); +} +EXPORT_SYMBOL(cfs_hash_debug_str); diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c new file mode 100644 index 000000000..33352af6c --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c @@ -0,0 +1,227 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * GPL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * Please see comments in libcfs/include/libcfs/libcfs_cpu.h for introduction + * + * Author: liang@whamcloud.com + */ + +#define DEBUG_SUBSYSTEM S_LNET + +#include "../../include/linux/libcfs/libcfs.h" + +/** Global CPU partition table */ +struct cfs_cpt_table *cfs_cpt_table __read_mostly; +EXPORT_SYMBOL(cfs_cpt_table); + +#ifndef HAVE_LIBCFS_CPT + +#define CFS_CPU_VERSION_MAGIC 0xbabecafe + +struct cfs_cpt_table * +cfs_cpt_table_alloc(unsigned int ncpt) +{ + struct cfs_cpt_table *cptab; + + if (ncpt != 1) { + CERROR("Can't support cpu partition number %d\n", ncpt); + return NULL; + } + + LIBCFS_ALLOC(cptab, sizeof(*cptab)); + if (cptab) { + cptab->ctb_version = CFS_CPU_VERSION_MAGIC; + node_set(0, cptab->ctb_nodemask); + cptab->ctb_nparts = ncpt; + } + + return cptab; +} +EXPORT_SYMBOL(cfs_cpt_table_alloc); + +void +cfs_cpt_table_free(struct cfs_cpt_table *cptab) +{ + LASSERT(cptab->ctb_version == CFS_CPU_VERSION_MAGIC); + + LIBCFS_FREE(cptab, sizeof(*cptab)); +} +EXPORT_SYMBOL(cfs_cpt_table_free); + +#ifdef CONFIG_SMP +int +cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len) +{ + int rc; + + rc = snprintf(buf, len, "%d\t: %d\n", 0, 0); + len -= rc; + if (len <= 0) + return -EFBIG; + + return rc; +} +EXPORT_SYMBOL(cfs_cpt_table_print); +#endif /* CONFIG_SMP */ + +int +cfs_cpt_number(struct cfs_cpt_table *cptab) +{ + return 1; +} +EXPORT_SYMBOL(cfs_cpt_number); + +int +cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt) +{ + return 1; +} +EXPORT_SYMBOL(cfs_cpt_weight); + +int +cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt) +{ + return 1; +} +EXPORT_SYMBOL(cfs_cpt_online); + +nodemask_t * +cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt) +{ + return &cptab->ctb_nodemask; +} +EXPORT_SYMBOL(cfs_cpt_cpumask); + +int +cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) +{ + return 1; +} +EXPORT_SYMBOL(cfs_cpt_set_cpu); + +void +cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) +{ +} +EXPORT_SYMBOL(cfs_cpt_unset_cpu); + +int +cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask) +{ + return 1; +} +EXPORT_SYMBOL(cfs_cpt_set_cpumask); + +void +cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask) +{ +} +EXPORT_SYMBOL(cfs_cpt_unset_cpumask); + +int +cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node) +{ + return 1; +} +EXPORT_SYMBOL(cfs_cpt_set_node); + +void +cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node) +{ +} +EXPORT_SYMBOL(cfs_cpt_unset_node); + +int +cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask) +{ + return 1; +} +EXPORT_SYMBOL(cfs_cpt_set_nodemask); + +void +cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask) +{ +} +EXPORT_SYMBOL(cfs_cpt_unset_nodemask); + +void +cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt) +{ +} +EXPORT_SYMBOL(cfs_cpt_clear); + +int +cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt) +{ + return 0; +} +EXPORT_SYMBOL(cfs_cpt_spread_node); + +int +cfs_cpu_ht_nsiblings(int cpu) +{ + return 1; +} +EXPORT_SYMBOL(cfs_cpu_ht_nsiblings); + +int +cfs_cpt_current(struct cfs_cpt_table *cptab, int remap) +{ + return 0; +} +EXPORT_SYMBOL(cfs_cpt_current); + +int +cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu) +{ + return 0; +} +EXPORT_SYMBOL(cfs_cpt_of_cpu); + +int +cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt) +{ + return 0; +} +EXPORT_SYMBOL(cfs_cpt_bind); + +void +cfs_cpu_fini(void) +{ + if (cfs_cpt_table) { + cfs_cpt_table_free(cfs_cpt_table); + cfs_cpt_table = NULL; + } +} + +int +cfs_cpu_init(void) +{ + cfs_cpt_table = cfs_cpt_table_alloc(1); + + return cfs_cpt_table ? 0 : -1; +} + +#endif /* HAVE_LIBCFS_CPT */ diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c b/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c new file mode 100644 index 000000000..2de9eeae0 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c @@ -0,0 +1,185 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * GPL HEADER END + */ +/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2015 Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * Author: liang@whamcloud.com + */ + +#define DEBUG_SUBSYSTEM S_LNET + +#include "../../include/linux/libcfs/libcfs.h" + +/** destroy cpu-partition lock, see libcfs_private.h for more detail */ +void +cfs_percpt_lock_free(struct cfs_percpt_lock *pcl) +{ + LASSERT(pcl->pcl_locks); + LASSERT(!pcl->pcl_locked); + + cfs_percpt_free(pcl->pcl_locks); + LIBCFS_FREE(pcl, sizeof(*pcl)); +} +EXPORT_SYMBOL(cfs_percpt_lock_free); + +/** + * create cpu-partition lock, see libcfs_private.h for more detail. + * + * cpu-partition lock is designed for large-scale SMP system, so we need to + * reduce cacheline conflict as possible as we can, that's the + * reason we always allocate cacheline-aligned memory block. + */ +struct cfs_percpt_lock * +cfs_percpt_lock_alloc(struct cfs_cpt_table *cptab) +{ + struct cfs_percpt_lock *pcl; + spinlock_t *lock; + int i; + + /* NB: cptab can be NULL, pcl will be for HW CPUs on that case */ + LIBCFS_ALLOC(pcl, sizeof(*pcl)); + if (!pcl) + return NULL; + + pcl->pcl_cptab = cptab; + pcl->pcl_locks = cfs_percpt_alloc(cptab, sizeof(*lock)); + if (!pcl->pcl_locks) { + LIBCFS_FREE(pcl, sizeof(*pcl)); + return NULL; + } + + cfs_percpt_for_each(lock, i, pcl->pcl_locks) + spin_lock_init(lock); + + return pcl; +} +EXPORT_SYMBOL(cfs_percpt_lock_alloc); + +/** + * lock a CPU partition + * + * \a index != CFS_PERCPT_LOCK_EX + * hold private lock indexed by \a index + * + * \a index == CFS_PERCPT_LOCK_EX + * exclusively lock @pcl and nobody can take private lock + */ +void +cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index) + __acquires(pcl->pcl_locks) +{ + int ncpt = cfs_cpt_number(pcl->pcl_cptab); + int i; + + LASSERT(index >= CFS_PERCPT_LOCK_EX && index < ncpt); + + if (ncpt == 1) { + index = 0; + } else { /* serialize with exclusive lock */ + while (pcl->pcl_locked) + cpu_relax(); + } + + if (likely(index != CFS_PERCPT_LOCK_EX)) { + spin_lock(pcl->pcl_locks[index]); + return; + } + + /* exclusive lock request */ + for (i = 0; i < ncpt; i++) { + spin_lock(pcl->pcl_locks[i]); + if (i == 0) { + LASSERT(!pcl->pcl_locked); + /* nobody should take private lock after this + * so I wouldn't starve for too long time + */ + pcl->pcl_locked = 1; + } + } +} +EXPORT_SYMBOL(cfs_percpt_lock); + +/** unlock a CPU partition */ +void +cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index) + __releases(pcl->pcl_locks) +{ + int ncpt = cfs_cpt_number(pcl->pcl_cptab); + int i; + + index = ncpt == 1 ? 0 : index; + + if (likely(index != CFS_PERCPT_LOCK_EX)) { + spin_unlock(pcl->pcl_locks[index]); + return; + } + + for (i = ncpt - 1; i >= 0; i--) { + if (i == 0) { + LASSERT(pcl->pcl_locked); + pcl->pcl_locked = 0; + } + spin_unlock(pcl->pcl_locks[i]); + } +} +EXPORT_SYMBOL(cfs_percpt_unlock); + +/** free cpu-partition refcount */ +void +cfs_percpt_atomic_free(atomic_t **refs) +{ + cfs_percpt_free(refs); +} +EXPORT_SYMBOL(cfs_percpt_atomic_free); + +/** allocate cpu-partition refcount with initial value @init_val */ +atomic_t ** +cfs_percpt_atomic_alloc(struct cfs_cpt_table *cptab, int init_val) +{ + atomic_t **refs; + atomic_t *ref; + int i; + + refs = cfs_percpt_alloc(cptab, sizeof(*ref)); + if (!refs) + return NULL; + + cfs_percpt_for_each(ref, i, refs) + atomic_set(ref, init_val); + return refs; +} +EXPORT_SYMBOL(cfs_percpt_atomic_alloc); + +/** return sum of cpu-partition refs */ +int +cfs_percpt_atomic_summary(atomic_t **refs) +{ + atomic_t *ref; + int i; + int val = 0; + + cfs_percpt_for_each(ref, i, refs) + val += atomic_read(ref); + + return val; +} +EXPORT_SYMBOL(cfs_percpt_atomic_summary); diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c b/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c new file mode 100644 index 000000000..c5a695151 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c @@ -0,0 +1,196 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * GPL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * Author: liang@whamcloud.com + */ + +#define DEBUG_SUBSYSTEM S_LNET + +#include "../../include/linux/libcfs/libcfs.h" + +struct cfs_var_array { + unsigned int va_count; /* # of buffers */ + unsigned int va_size; /* size of each var */ + struct cfs_cpt_table *va_cptab; /* cpu partition table */ + void *va_ptrs[0]; /* buffer addresses */ +}; + +/* + * free per-cpu data, see more detail in cfs_percpt_free + */ +void +cfs_percpt_free(void *vars) +{ + struct cfs_var_array *arr; + int i; + + arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); + + for (i = 0; i < arr->va_count; i++) { + if (arr->va_ptrs[i]) + LIBCFS_FREE(arr->va_ptrs[i], arr->va_size); + } + + LIBCFS_FREE(arr, offsetof(struct cfs_var_array, + va_ptrs[arr->va_count])); +} +EXPORT_SYMBOL(cfs_percpt_free); + +/* + * allocate per cpu-partition variables, returned value is an array of pointers, + * variable can be indexed by CPU partition ID, i.e: + * + * arr = cfs_percpt_alloc(cfs_cpu_pt, size); + * then caller can access memory block for CPU 0 by arr[0], + * memory block for CPU 1 by arr[1]... + * memory block for CPU N by arr[N]... + * + * cacheline aligned. + */ +void * +cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size) +{ + struct cfs_var_array *arr; + int count; + int i; + + count = cfs_cpt_number(cptab); + + LIBCFS_ALLOC(arr, offsetof(struct cfs_var_array, va_ptrs[count])); + if (!arr) + return NULL; + + size = L1_CACHE_ALIGN(size); + arr->va_size = size; + arr->va_count = count; + arr->va_cptab = cptab; + + for (i = 0; i < count; i++) { + LIBCFS_CPT_ALLOC(arr->va_ptrs[i], cptab, i, size); + if (!arr->va_ptrs[i]) { + cfs_percpt_free((void *)&arr->va_ptrs[0]); + return NULL; + } + } + + return (void *)&arr->va_ptrs[0]; +} +EXPORT_SYMBOL(cfs_percpt_alloc); + +/* + * return number of CPUs (or number of elements in per-cpu data) + * according to cptab of @vars + */ +int +cfs_percpt_number(void *vars) +{ + struct cfs_var_array *arr; + + arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); + + return arr->va_count; +} +EXPORT_SYMBOL(cfs_percpt_number); + +/* + * return memory block shadowed from current CPU + */ +void * +cfs_percpt_current(void *vars) +{ + struct cfs_var_array *arr; + int cpt; + + arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); + cpt = cfs_cpt_current(arr->va_cptab, 0); + if (cpt < 0) + return NULL; + + return arr->va_ptrs[cpt]; +} + +void * +cfs_percpt_index(void *vars, int idx) +{ + struct cfs_var_array *arr; + + arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); + + LASSERT(idx >= 0 && idx < arr->va_count); + return arr->va_ptrs[idx]; +} + +/* + * free variable array, see more detail in cfs_array_alloc + */ +void +cfs_array_free(void *vars) +{ + struct cfs_var_array *arr; + int i; + + arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); + + for (i = 0; i < arr->va_count; i++) { + if (!arr->va_ptrs[i]) + continue; + + LIBCFS_FREE(arr->va_ptrs[i], arr->va_size); + } + LIBCFS_FREE(arr, offsetof(struct cfs_var_array, + va_ptrs[arr->va_count])); +} +EXPORT_SYMBOL(cfs_array_free); + +/* + * allocate a variable array, returned value is an array of pointers. + * Caller can specify length of array by @count, @size is size of each + * memory block in array. + */ +void * +cfs_array_alloc(int count, unsigned int size) +{ + struct cfs_var_array *arr; + int i; + + LIBCFS_ALLOC(arr, offsetof(struct cfs_var_array, va_ptrs[count])); + if (!arr) + return NULL; + + arr->va_count = count; + arr->va_size = size; + + for (i = 0; i < count; i++) { + LIBCFS_ALLOC(arr->va_ptrs[i], size); + + if (!arr->va_ptrs[i]) { + cfs_array_free((void *)&arr->va_ptrs[0]); + return NULL; + } + } + + return (void *)&arr->va_ptrs[0]; +} +EXPORT_SYMBOL(cfs_array_alloc); diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c new file mode 100644 index 000000000..50ac1536d --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c @@ -0,0 +1,581 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, 2015 Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * String manipulation functions. + * + * libcfs/libcfs/libcfs_string.c + * + * Author: Nathan Rutman <nathan.rutman@sun.com> + */ + +#include "../../include/linux/libcfs/libcfs.h" + +/* Convert a text string to a bitmask */ +int cfs_str2mask(const char *str, const char *(*bit2str)(int bit), + int *oldmask, int minmask, int allmask) +{ + const char *debugstr; + char op = '\0'; + int newmask = minmask, i, len, found = 0; + + /* <str> must be a list of tokens separated by whitespace + * and optionally an operator ('+' or '-'). If an operator + * appears first in <str>, '*oldmask' is used as the starting point + * (relative), otherwise minmask is used (absolute). An operator + * applies to all following tokens up to the next operator. + */ + while (*str != '\0') { + while (isspace(*str)) + str++; + if (*str == '\0') + break; + if (*str == '+' || *str == '-') { + op = *str++; + if (!found) + /* only if first token is relative */ + newmask = *oldmask; + while (isspace(*str)) + str++; + if (*str == '\0') /* trailing op */ + return -EINVAL; + } + + /* find token length */ + len = 0; + while (str[len] != '\0' && !isspace(str[len]) && + str[len] != '+' && str[len] != '-') + len++; + + /* match token */ + found = 0; + for (i = 0; i < 32; i++) { + debugstr = bit2str(i); + if (debugstr && strlen(debugstr) == len && + strncasecmp(str, debugstr, len) == 0) { + if (op == '-') + newmask &= ~(1 << i); + else + newmask |= (1 << i); + found = 1; + break; + } + } + if (!found && len == 3 && + (strncasecmp(str, "ALL", len) == 0)) { + if (op == '-') + newmask = minmask; + else + newmask = allmask; + found = 1; + } + if (!found) { + CWARN("unknown mask '%.*s'.\n" + "mask usage: [+|-]<all|type> ...\n", len, str); + return -EINVAL; + } + str += len; + } + + *oldmask = newmask; + return 0; +} + +/* get the first string out of @str */ +char *cfs_firststr(char *str, size_t size) +{ + size_t i = 0; + char *end; + + /* trim leading spaces */ + while (i < size && *str && isspace(*str)) { + ++i; + ++str; + } + + /* string with all spaces */ + if (*str == '\0') + goto out; + + end = str; + while (i < size && *end != '\0' && !isspace(*end)) { + ++i; + ++end; + } + + *end = '\0'; +out: + return str; +} +EXPORT_SYMBOL(cfs_firststr); + +char * +cfs_trimwhite(char *str) +{ + char *end; + + while (isspace(*str)) + str++; + + end = str + strlen(str); + while (end > str) { + if (!isspace(end[-1])) + break; + end--; + } + + *end = 0; + return str; +} +EXPORT_SYMBOL(cfs_trimwhite); + +/** + * Extracts tokens from strings. + * + * Looks for \a delim in string \a next, sets \a res to point to + * substring before the delimiter, sets \a next right after the found + * delimiter. + * + * \retval 1 if \a res points to a string of non-whitespace characters + * \retval 0 otherwise + */ +int +cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res) +{ + char *end; + + if (!next->ls_str) + return 0; + + /* skip leading white spaces */ + while (next->ls_len) { + if (!isspace(*next->ls_str)) + break; + next->ls_str++; + next->ls_len--; + } + + if (next->ls_len == 0) /* whitespaces only */ + return 0; + + if (*next->ls_str == delim) { + /* first non-writespace is the delimiter */ + return 0; + } + + res->ls_str = next->ls_str; + end = memchr(next->ls_str, delim, next->ls_len); + if (!end) { + /* there is no the delimeter in the string */ + end = next->ls_str + next->ls_len; + next->ls_str = NULL; + } else { + next->ls_str = end + 1; + next->ls_len -= (end - res->ls_str + 1); + } + + /* skip ending whitespaces */ + while (--end != res->ls_str) { + if (!isspace(*end)) + break; + } + + res->ls_len = end - res->ls_str + 1; + return 1; +} +EXPORT_SYMBOL(cfs_gettok); + +/** + * Converts string to integer. + * + * Accepts decimal and hexadecimal number recordings. + * + * \retval 1 if first \a nob chars of \a str convert to decimal or + * hexadecimal integer in the range [\a min, \a max] + * \retval 0 otherwise + */ +int +cfs_str2num_check(char *str, int nob, unsigned *num, + unsigned min, unsigned max) +{ + bool all_numbers = true; + char *endp, cache; + int rc; + + str = cfs_trimwhite(str); + + /** + * kstrouint can only handle strings composed + * of only numbers. We need to scan the string + * passed in for the first non-digit character + * and end the string at that location. If we + * don't find any non-digit character we still + * need to place a '\0' at position nob since + * we are not interested in the rest of the + * string which is longer than nob in size. + * After we are done the character at the + * position we placed '\0' must be restored. + */ + for (endp = str; endp < str + nob; endp++) { + if (!isdigit(*endp)) { + all_numbers = false; + break; + } + } + cache = *endp; + *endp = '\0'; + + rc = kstrtouint(str, 10, num); + *endp = cache; + if (rc || !all_numbers) + return 0; + + return (*num >= min && *num <= max); +} +EXPORT_SYMBOL(cfs_str2num_check); + +/** + * Parses \<range_expr\> token of the syntax. If \a bracketed is false, + * \a src should only have a single token which can be \<number\> or \* + * + * \retval pointer to allocated range_expr and initialized + * range_expr::re_lo, range_expr::re_hi and range_expr:re_stride if \a + `* src parses to + * \<number\> | + * \<number\> '-' \<number\> | + * \<number\> '-' \<number\> '/' \<number\> + * \retval 0 will be returned if it can be parsed, otherwise -EINVAL or + * -ENOMEM will be returned. + */ +static int +cfs_range_expr_parse(struct cfs_lstr *src, unsigned min, unsigned max, + int bracketed, struct cfs_range_expr **expr) +{ + struct cfs_range_expr *re; + struct cfs_lstr tok; + + LIBCFS_ALLOC(re, sizeof(*re)); + if (!re) + return -ENOMEM; + + if (src->ls_len == 1 && src->ls_str[0] == '*') { + re->re_lo = min; + re->re_hi = max; + re->re_stride = 1; + goto out; + } + + if (cfs_str2num_check(src->ls_str, src->ls_len, + &re->re_lo, min, max)) { + /* <number> is parsed */ + re->re_hi = re->re_lo; + re->re_stride = 1; + goto out; + } + + if (!bracketed || !cfs_gettok(src, '-', &tok)) + goto failed; + + if (!cfs_str2num_check(tok.ls_str, tok.ls_len, + &re->re_lo, min, max)) + goto failed; + + /* <number> - */ + if (cfs_str2num_check(src->ls_str, src->ls_len, + &re->re_hi, min, max)) { + /* <number> - <number> is parsed */ + re->re_stride = 1; + goto out; + } + + /* go to check <number> '-' <number> '/' <number> */ + if (cfs_gettok(src, '/', &tok)) { + if (!cfs_str2num_check(tok.ls_str, tok.ls_len, + &re->re_hi, min, max)) + goto failed; + + /* <number> - <number> / ... */ + if (cfs_str2num_check(src->ls_str, src->ls_len, + &re->re_stride, min, max)) { + /* <number> - <number> / <number> is parsed */ + goto out; + } + } + + out: + *expr = re; + return 0; + + failed: + LIBCFS_FREE(re, sizeof(*re)); + return -EINVAL; +} + +/** + * Print the range expression \a re into specified \a buffer. + * If \a bracketed is true, expression does not need additional + * brackets. + * + * \retval number of characters written + */ +static int +cfs_range_expr_print(char *buffer, int count, struct cfs_range_expr *expr, + bool bracketed) +{ + int i; + char s[] = "["; + char e[] = "]"; + + if (bracketed) { + s[0] = '\0'; + e[0] = '\0'; + } + + if (expr->re_lo == expr->re_hi) + i = scnprintf(buffer, count, "%u", expr->re_lo); + else if (expr->re_stride == 1) + i = scnprintf(buffer, count, "%s%u-%u%s", + s, expr->re_lo, expr->re_hi, e); + else + i = scnprintf(buffer, count, "%s%u-%u/%u%s", + s, expr->re_lo, expr->re_hi, expr->re_stride, e); + return i; +} + +/** + * Print a list of range expressions (\a expr_list) into specified \a buffer. + * If the list contains several expressions, separate them with comma + * and surround the list with brackets. + * + * \retval number of characters written + */ +int +cfs_expr_list_print(char *buffer, int count, struct cfs_expr_list *expr_list) +{ + struct cfs_range_expr *expr; + int i = 0, j = 0; + int numexprs = 0; + + if (count <= 0) + return 0; + + list_for_each_entry(expr, &expr_list->el_exprs, re_link) + numexprs++; + + if (numexprs > 1) + i += scnprintf(buffer + i, count - i, "["); + + list_for_each_entry(expr, &expr_list->el_exprs, re_link) { + if (j++ != 0) + i += scnprintf(buffer + i, count - i, ","); + i += cfs_range_expr_print(buffer + i, count - i, expr, + numexprs > 1); + } + + if (numexprs > 1) + i += scnprintf(buffer + i, count - i, "]"); + + return i; +} +EXPORT_SYMBOL(cfs_expr_list_print); + +/** + * Matches value (\a value) against ranges expression list \a expr_list. + * + * \retval 1 if \a value matches + * \retval 0 otherwise + */ +int +cfs_expr_list_match(__u32 value, struct cfs_expr_list *expr_list) +{ + struct cfs_range_expr *expr; + + list_for_each_entry(expr, &expr_list->el_exprs, re_link) { + if (value >= expr->re_lo && value <= expr->re_hi && + ((value - expr->re_lo) % expr->re_stride) == 0) + return 1; + } + + return 0; +} +EXPORT_SYMBOL(cfs_expr_list_match); + +/** + * Convert express list (\a expr_list) to an array of all matched values + * + * \retval N N is total number of all matched values + * \retval 0 if expression list is empty + * \retval < 0 for failure + */ +int +cfs_expr_list_values(struct cfs_expr_list *expr_list, int max, __u32 **valpp) +{ + struct cfs_range_expr *expr; + __u32 *val; + int count = 0; + int i; + + list_for_each_entry(expr, &expr_list->el_exprs, re_link) { + for (i = expr->re_lo; i <= expr->re_hi; i++) { + if (((i - expr->re_lo) % expr->re_stride) == 0) + count++; + } + } + + if (count == 0) /* empty expression list */ + return 0; + + if (count > max) { + CERROR("Number of values %d exceeds max allowed %d\n", + max, count); + return -EINVAL; + } + + LIBCFS_ALLOC(val, sizeof(val[0]) * count); + if (!val) + return -ENOMEM; + + count = 0; + list_for_each_entry(expr, &expr_list->el_exprs, re_link) { + for (i = expr->re_lo; i <= expr->re_hi; i++) { + if (((i - expr->re_lo) % expr->re_stride) == 0) + val[count++] = i; + } + } + + *valpp = val; + return count; +} +EXPORT_SYMBOL(cfs_expr_list_values); + +/** + * Frees cfs_range_expr structures of \a expr_list. + * + * \retval none + */ +void +cfs_expr_list_free(struct cfs_expr_list *expr_list) +{ + while (!list_empty(&expr_list->el_exprs)) { + struct cfs_range_expr *expr; + + expr = list_entry(expr_list->el_exprs.next, + struct cfs_range_expr, re_link); + list_del(&expr->re_link); + LIBCFS_FREE(expr, sizeof(*expr)); + } + + LIBCFS_FREE(expr_list, sizeof(*expr_list)); +} +EXPORT_SYMBOL(cfs_expr_list_free); + +/** + * Parses \<cfs_expr_list\> token of the syntax. + * + * \retval 0 if \a str parses to \<number\> | \<expr_list\> + * \retval -errno otherwise + */ +int +cfs_expr_list_parse(char *str, int len, unsigned min, unsigned max, + struct cfs_expr_list **elpp) +{ + struct cfs_expr_list *expr_list; + struct cfs_range_expr *expr; + struct cfs_lstr src; + int rc; + + LIBCFS_ALLOC(expr_list, sizeof(*expr_list)); + if (!expr_list) + return -ENOMEM; + + src.ls_str = str; + src.ls_len = len; + + INIT_LIST_HEAD(&expr_list->el_exprs); + + if (src.ls_str[0] == '[' && + src.ls_str[src.ls_len - 1] == ']') { + src.ls_str++; + src.ls_len -= 2; + + rc = -EINVAL; + while (src.ls_str) { + struct cfs_lstr tok; + + if (!cfs_gettok(&src, ',', &tok)) { + rc = -EINVAL; + break; + } + + rc = cfs_range_expr_parse(&tok, min, max, 1, &expr); + if (rc != 0) + break; + + list_add_tail(&expr->re_link, &expr_list->el_exprs); + } + } else { + rc = cfs_range_expr_parse(&src, min, max, 0, &expr); + if (rc == 0) + list_add_tail(&expr->re_link, &expr_list->el_exprs); + } + + if (rc != 0) + cfs_expr_list_free(expr_list); + else + *elpp = expr_list; + + return rc; +} +EXPORT_SYMBOL(cfs_expr_list_parse); + +/** + * Frees cfs_expr_list structures of \a list. + * + * For each struct cfs_expr_list structure found on \a list it frees + * range_expr list attached to it and frees the cfs_expr_list itself. + * + * \retval none + */ +void +cfs_expr_list_free_list(struct list_head *list) +{ + struct cfs_expr_list *el; + + while (!list_empty(list)) { + el = list_entry(list->next, struct cfs_expr_list, el_link); + list_del(&el->el_link); + cfs_expr_list_free(el); + } +} +EXPORT_SYMBOL(cfs_expr_list_free_list); diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c new file mode 100644 index 000000000..389fb9eee --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c @@ -0,0 +1,1040 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * GPL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * + * Copyright (c) 2012, 2015 Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * Author: liang@whamcloud.com + */ + +#define DEBUG_SUBSYSTEM S_LNET + +#include <linux/cpu.h> +#include <linux/sched.h> +#include "../../../include/linux/libcfs/libcfs.h" + +#ifdef CONFIG_SMP + +/** + * modparam for setting number of partitions + * + * 0 : estimate best value based on cores or NUMA nodes + * 1 : disable multiple partitions + * >1 : specify number of partitions + */ +static int cpu_npartitions; +module_param(cpu_npartitions, int, 0444); +MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions"); + +/** + * modparam for setting CPU partitions patterns: + * + * i.e: "0[0,1,2,3] 1[4,5,6,7]", number before bracket is CPU partition ID, + * number in bracket is processor ID (core or HT) + * + * i.e: "N 0[0,1] 1[2,3]" the first character 'N' means numbers in bracket + * are NUMA node ID, number before bracket is CPU partition ID. + * + * NB: If user specified cpu_pattern, cpu_npartitions will be ignored + */ +static char *cpu_pattern = ""; +module_param(cpu_pattern, charp, 0444); +MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern"); + +struct cfs_cpt_data { + /* serialize hotplug etc */ + spinlock_t cpt_lock; + /* reserved for hotplug */ + unsigned long cpt_version; + /* mutex to protect cpt_cpumask */ + struct mutex cpt_mutex; + /* scratch buffer for set/unset_node */ + cpumask_t *cpt_cpumask; +}; + +static struct cfs_cpt_data cpt_data; + +void +cfs_cpt_table_free(struct cfs_cpt_table *cptab) +{ + int i; + + if (cptab->ctb_cpu2cpt) { + LIBCFS_FREE(cptab->ctb_cpu2cpt, + num_possible_cpus() * + sizeof(cptab->ctb_cpu2cpt[0])); + } + + for (i = 0; cptab->ctb_parts && i < cptab->ctb_nparts; i++) { + struct cfs_cpu_partition *part = &cptab->ctb_parts[i]; + + if (part->cpt_nodemask) { + LIBCFS_FREE(part->cpt_nodemask, + sizeof(*part->cpt_nodemask)); + } + + if (part->cpt_cpumask) + LIBCFS_FREE(part->cpt_cpumask, cpumask_size()); + } + + if (cptab->ctb_parts) { + LIBCFS_FREE(cptab->ctb_parts, + cptab->ctb_nparts * sizeof(cptab->ctb_parts[0])); + } + + if (cptab->ctb_nodemask) + LIBCFS_FREE(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask)); + if (cptab->ctb_cpumask) + LIBCFS_FREE(cptab->ctb_cpumask, cpumask_size()); + + LIBCFS_FREE(cptab, sizeof(*cptab)); +} +EXPORT_SYMBOL(cfs_cpt_table_free); + +struct cfs_cpt_table * +cfs_cpt_table_alloc(unsigned int ncpt) +{ + struct cfs_cpt_table *cptab; + int i; + + LIBCFS_ALLOC(cptab, sizeof(*cptab)); + if (!cptab) + return NULL; + + cptab->ctb_nparts = ncpt; + + LIBCFS_ALLOC(cptab->ctb_cpumask, cpumask_size()); + LIBCFS_ALLOC(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask)); + + if (!cptab->ctb_cpumask || !cptab->ctb_nodemask) + goto failed; + + LIBCFS_ALLOC(cptab->ctb_cpu2cpt, + num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0])); + if (!cptab->ctb_cpu2cpt) + goto failed; + + memset(cptab->ctb_cpu2cpt, -1, + num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0])); + + LIBCFS_ALLOC(cptab->ctb_parts, ncpt * sizeof(cptab->ctb_parts[0])); + if (!cptab->ctb_parts) + goto failed; + + for (i = 0; i < ncpt; i++) { + struct cfs_cpu_partition *part = &cptab->ctb_parts[i]; + + LIBCFS_ALLOC(part->cpt_cpumask, cpumask_size()); + LIBCFS_ALLOC(part->cpt_nodemask, sizeof(*part->cpt_nodemask)); + if (!part->cpt_cpumask || !part->cpt_nodemask) + goto failed; + } + + spin_lock(&cpt_data.cpt_lock); + /* Reserved for hotplug */ + cptab->ctb_version = cpt_data.cpt_version; + spin_unlock(&cpt_data.cpt_lock); + + return cptab; + + failed: + cfs_cpt_table_free(cptab); + return NULL; +} +EXPORT_SYMBOL(cfs_cpt_table_alloc); + +int +cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len) +{ + char *tmp = buf; + int rc = 0; + int i; + int j; + + for (i = 0; i < cptab->ctb_nparts; i++) { + if (len > 0) { + rc = snprintf(tmp, len, "%d\t: ", i); + len -= rc; + } + + if (len <= 0) { + rc = -EFBIG; + goto out; + } + + tmp += rc; + for_each_cpu(j, cptab->ctb_parts[i].cpt_cpumask) { + rc = snprintf(tmp, len, "%d ", j); + len -= rc; + if (len <= 0) { + rc = -EFBIG; + goto out; + } + tmp += rc; + } + + *tmp = '\n'; + tmp++; + len--; + } + + out: + if (rc < 0) + return rc; + + return tmp - buf; +} +EXPORT_SYMBOL(cfs_cpt_table_print); + +int +cfs_cpt_number(struct cfs_cpt_table *cptab) +{ + return cptab->ctb_nparts; +} +EXPORT_SYMBOL(cfs_cpt_number); + +int +cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt) +{ + LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); + + return cpt == CFS_CPT_ANY ? + cpumask_weight(cptab->ctb_cpumask) : + cpumask_weight(cptab->ctb_parts[cpt].cpt_cpumask); +} +EXPORT_SYMBOL(cfs_cpt_weight); + +int +cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt) +{ + LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); + + return cpt == CFS_CPT_ANY ? + cpumask_any_and(cptab->ctb_cpumask, + cpu_online_mask) < nr_cpu_ids : + cpumask_any_and(cptab->ctb_parts[cpt].cpt_cpumask, + cpu_online_mask) < nr_cpu_ids; +} +EXPORT_SYMBOL(cfs_cpt_online); + +cpumask_t * +cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt) +{ + LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); + + return cpt == CFS_CPT_ANY ? + cptab->ctb_cpumask : cptab->ctb_parts[cpt].cpt_cpumask; +} +EXPORT_SYMBOL(cfs_cpt_cpumask); + +nodemask_t * +cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt) +{ + LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); + + return cpt == CFS_CPT_ANY ? + cptab->ctb_nodemask : cptab->ctb_parts[cpt].cpt_nodemask; +} +EXPORT_SYMBOL(cfs_cpt_nodemask); + +int +cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) +{ + int node; + + LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts); + + if (cpu < 0 || cpu >= nr_cpu_ids || !cpu_online(cpu)) { + CDEBUG(D_INFO, "CPU %d is invalid or it's offline\n", cpu); + return 0; + } + + if (cptab->ctb_cpu2cpt[cpu] != -1) { + CDEBUG(D_INFO, "CPU %d is already in partition %d\n", + cpu, cptab->ctb_cpu2cpt[cpu]); + return 0; + } + + cptab->ctb_cpu2cpt[cpu] = cpt; + + LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_cpumask)); + LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask)); + + cpumask_set_cpu(cpu, cptab->ctb_cpumask); + cpumask_set_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask); + + node = cpu_to_node(cpu); + + /* first CPU of @node in this CPT table */ + if (!node_isset(node, *cptab->ctb_nodemask)) + node_set(node, *cptab->ctb_nodemask); + + /* first CPU of @node in this partition */ + if (!node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask)) + node_set(node, *cptab->ctb_parts[cpt].cpt_nodemask); + + return 1; +} +EXPORT_SYMBOL(cfs_cpt_set_cpu); + +void +cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) +{ + int node; + int i; + + LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); + + if (cpu < 0 || cpu >= nr_cpu_ids) { + CDEBUG(D_INFO, "Invalid CPU id %d\n", cpu); + return; + } + + if (cpt == CFS_CPT_ANY) { + /* caller doesn't know the partition ID */ + cpt = cptab->ctb_cpu2cpt[cpu]; + if (cpt < 0) { /* not set in this CPT-table */ + CDEBUG(D_INFO, "Try to unset cpu %d which is not in CPT-table %p\n", + cpt, cptab); + return; + } + + } else if (cpt != cptab->ctb_cpu2cpt[cpu]) { + CDEBUG(D_INFO, + "CPU %d is not in cpu-partition %d\n", cpu, cpt); + return; + } + + LASSERT(cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask)); + LASSERT(cpumask_test_cpu(cpu, cptab->ctb_cpumask)); + + cpumask_clear_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask); + cpumask_clear_cpu(cpu, cptab->ctb_cpumask); + cptab->ctb_cpu2cpt[cpu] = -1; + + node = cpu_to_node(cpu); + + LASSERT(node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask)); + LASSERT(node_isset(node, *cptab->ctb_nodemask)); + + for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) { + /* this CPT has other CPU belonging to this node? */ + if (cpu_to_node(i) == node) + break; + } + + if (i >= nr_cpu_ids) + node_clear(node, *cptab->ctb_parts[cpt].cpt_nodemask); + + for_each_cpu(i, cptab->ctb_cpumask) { + /* this CPT-table has other CPU belonging to this node? */ + if (cpu_to_node(i) == node) + break; + } + + if (i >= nr_cpu_ids) + node_clear(node, *cptab->ctb_nodemask); +} +EXPORT_SYMBOL(cfs_cpt_unset_cpu); + +int +cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask) +{ + int i; + + if (cpumask_weight(mask) == 0 || + cpumask_any_and(mask, cpu_online_mask) >= nr_cpu_ids) { + CDEBUG(D_INFO, "No online CPU is found in the CPU mask for CPU partition %d\n", + cpt); + return 0; + } + + for_each_cpu(i, mask) { + if (!cfs_cpt_set_cpu(cptab, cpt, i)) + return 0; + } + + return 1; +} +EXPORT_SYMBOL(cfs_cpt_set_cpumask); + +void +cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask) +{ + int i; + + for_each_cpu(i, mask) + cfs_cpt_unset_cpu(cptab, cpt, i); +} +EXPORT_SYMBOL(cfs_cpt_unset_cpumask); + +int +cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node) +{ + cpumask_t *mask; + int rc; + + if (node < 0 || node >= MAX_NUMNODES) { + CDEBUG(D_INFO, + "Invalid NUMA id %d for CPU partition %d\n", node, cpt); + return 0; + } + + mutex_lock(&cpt_data.cpt_mutex); + + mask = cpt_data.cpt_cpumask; + cpumask_copy(mask, cpumask_of_node(node)); + + rc = cfs_cpt_set_cpumask(cptab, cpt, mask); + + mutex_unlock(&cpt_data.cpt_mutex); + + return rc; +} +EXPORT_SYMBOL(cfs_cpt_set_node); + +void +cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node) +{ + cpumask_t *mask; + + if (node < 0 || node >= MAX_NUMNODES) { + CDEBUG(D_INFO, + "Invalid NUMA id %d for CPU partition %d\n", node, cpt); + return; + } + + mutex_lock(&cpt_data.cpt_mutex); + + mask = cpt_data.cpt_cpumask; + cpumask_copy(mask, cpumask_of_node(node)); + + cfs_cpt_unset_cpumask(cptab, cpt, mask); + + mutex_unlock(&cpt_data.cpt_mutex); +} +EXPORT_SYMBOL(cfs_cpt_unset_node); + +int +cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask) +{ + int i; + + for_each_node_mask(i, *mask) { + if (!cfs_cpt_set_node(cptab, cpt, i)) + return 0; + } + + return 1; +} +EXPORT_SYMBOL(cfs_cpt_set_nodemask); + +void +cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask) +{ + int i; + + for_each_node_mask(i, *mask) + cfs_cpt_unset_node(cptab, cpt, i); +} +EXPORT_SYMBOL(cfs_cpt_unset_nodemask); + +void +cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt) +{ + int last; + int i; + + if (cpt == CFS_CPT_ANY) { + last = cptab->ctb_nparts - 1; + cpt = 0; + } else { + last = cpt; + } + + for (; cpt <= last; cpt++) { + for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) + cfs_cpt_unset_cpu(cptab, cpt, i); + } +} +EXPORT_SYMBOL(cfs_cpt_clear); + +int +cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt) +{ + nodemask_t *mask; + int weight; + int rotor; + int node; + + /* convert CPU partition ID to HW node id */ + + if (cpt < 0 || cpt >= cptab->ctb_nparts) { + mask = cptab->ctb_nodemask; + rotor = cptab->ctb_spread_rotor++; + } else { + mask = cptab->ctb_parts[cpt].cpt_nodemask; + rotor = cptab->ctb_parts[cpt].cpt_spread_rotor++; + } + + weight = nodes_weight(*mask); + LASSERT(weight > 0); + + rotor %= weight; + + for_each_node_mask(node, *mask) { + if (rotor-- == 0) + return node; + } + + LBUG(); + return 0; +} +EXPORT_SYMBOL(cfs_cpt_spread_node); + +int +cfs_cpt_current(struct cfs_cpt_table *cptab, int remap) +{ + int cpu = smp_processor_id(); + int cpt = cptab->ctb_cpu2cpt[cpu]; + + if (cpt < 0) { + if (!remap) + return cpt; + + /* don't return negative value for safety of upper layer, + * instead we shadow the unknown cpu to a valid partition ID + */ + cpt = cpu % cptab->ctb_nparts; + } + + return cpt; +} +EXPORT_SYMBOL(cfs_cpt_current); + +int +cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu) +{ + LASSERT(cpu >= 0 && cpu < nr_cpu_ids); + + return cptab->ctb_cpu2cpt[cpu]; +} +EXPORT_SYMBOL(cfs_cpt_of_cpu); + +int +cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt) +{ + cpumask_t *cpumask; + nodemask_t *nodemask; + int rc; + int i; + + LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); + + if (cpt == CFS_CPT_ANY) { + cpumask = cptab->ctb_cpumask; + nodemask = cptab->ctb_nodemask; + } else { + cpumask = cptab->ctb_parts[cpt].cpt_cpumask; + nodemask = cptab->ctb_parts[cpt].cpt_nodemask; + } + + if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids) { + CERROR("No online CPU found in CPU partition %d, did someone do CPU hotplug on system? You might need to reload Lustre modules to keep system working well.\n", + cpt); + return -EINVAL; + } + + for_each_online_cpu(i) { + if (cpumask_test_cpu(i, cpumask)) + continue; + + rc = set_cpus_allowed_ptr(current, cpumask); + set_mems_allowed(*nodemask); + if (rc == 0) + schedule(); /* switch to allowed CPU */ + + return rc; + } + + /* don't need to set affinity because all online CPUs are covered */ + return 0; +} +EXPORT_SYMBOL(cfs_cpt_bind); + +/** + * Choose max to \a number CPUs from \a node and set them in \a cpt. + * We always prefer to choose CPU in the same core/socket. + */ +static int +cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt, + cpumask_t *node, int number) +{ + cpumask_t *socket = NULL; + cpumask_t *core = NULL; + int rc = 0; + int cpu; + + LASSERT(number > 0); + + if (number >= cpumask_weight(node)) { + while (!cpumask_empty(node)) { + cpu = cpumask_first(node); + + rc = cfs_cpt_set_cpu(cptab, cpt, cpu); + if (!rc) + return -EINVAL; + cpumask_clear_cpu(cpu, node); + } + return 0; + } + + /* allocate scratch buffer */ + LIBCFS_ALLOC(socket, cpumask_size()); + LIBCFS_ALLOC(core, cpumask_size()); + if (!socket || !core) { + rc = -ENOMEM; + goto out; + } + + while (!cpumask_empty(node)) { + cpu = cpumask_first(node); + + /* get cpumask for cores in the same socket */ + cpumask_copy(socket, topology_core_cpumask(cpu)); + cpumask_and(socket, socket, node); + + LASSERT(!cpumask_empty(socket)); + + while (!cpumask_empty(socket)) { + int i; + + /* get cpumask for hts in the same core */ + cpumask_copy(core, topology_sibling_cpumask(cpu)); + cpumask_and(core, core, node); + + LASSERT(!cpumask_empty(core)); + + for_each_cpu(i, core) { + cpumask_clear_cpu(i, socket); + cpumask_clear_cpu(i, node); + + rc = cfs_cpt_set_cpu(cptab, cpt, i); + if (!rc) { + rc = -EINVAL; + goto out; + } + + if (--number == 0) + goto out; + } + cpu = cpumask_first(socket); + } + } + + out: + if (socket) + LIBCFS_FREE(socket, cpumask_size()); + if (core) + LIBCFS_FREE(core, cpumask_size()); + return rc; +} + +#define CPT_WEIGHT_MIN 4u + +static unsigned int +cfs_cpt_num_estimate(void) +{ + unsigned nnode = num_online_nodes(); + unsigned ncpu = num_online_cpus(); + unsigned ncpt; + + if (ncpu <= CPT_WEIGHT_MIN) { + ncpt = 1; + goto out; + } + + /* generate reasonable number of CPU partitions based on total number + * of CPUs, Preferred N should be power2 and match this condition: + * 2 * (N - 1)^2 < NCPUS <= 2 * N^2 + */ + for (ncpt = 2; ncpu > 2 * ncpt * ncpt; ncpt <<= 1) + ; + + if (ncpt <= nnode) { /* fat numa system */ + while (nnode > ncpt) + nnode >>= 1; + + } else { /* ncpt > nnode */ + while ((nnode << 1) <= ncpt) + nnode <<= 1; + } + + ncpt = nnode; + + out: +#if (BITS_PER_LONG == 32) + /* config many CPU partitions on 32-bit system could consume + * too much memory + */ + ncpt = min(2U, ncpt); +#endif + while (ncpu % ncpt != 0) + ncpt--; /* worst case is 1 */ + + return ncpt; +} + +static struct cfs_cpt_table * +cfs_cpt_table_create(int ncpt) +{ + struct cfs_cpt_table *cptab = NULL; + cpumask_t *mask = NULL; + int cpt = 0; + int num; + int rc; + int i; + + rc = cfs_cpt_num_estimate(); + if (ncpt <= 0) + ncpt = rc; + + if (ncpt > num_online_cpus() || ncpt > 4 * rc) { + CWARN("CPU partition number %d is larger than suggested value (%d), your system may have performance issue or run out of memory while under pressure\n", + ncpt, rc); + } + + if (num_online_cpus() % ncpt != 0) { + CERROR("CPU number %d is not multiple of cpu_npartition %d, please try different cpu_npartitions value or set pattern string by cpu_pattern=STRING\n", + (int)num_online_cpus(), ncpt); + goto failed; + } + + cptab = cfs_cpt_table_alloc(ncpt); + if (!cptab) { + CERROR("Failed to allocate CPU map(%d)\n", ncpt); + goto failed; + } + + num = num_online_cpus() / ncpt; + if (num == 0) { + CERROR("CPU changed while setting CPU partition\n"); + goto failed; + } + + LIBCFS_ALLOC(mask, cpumask_size()); + if (!mask) { + CERROR("Failed to allocate scratch cpumask\n"); + goto failed; + } + + for_each_online_node(i) { + cpumask_copy(mask, cpumask_of_node(i)); + + while (!cpumask_empty(mask)) { + struct cfs_cpu_partition *part; + int n; + + if (cpt >= ncpt) + goto failed; + + part = &cptab->ctb_parts[cpt]; + + n = num - cpumask_weight(part->cpt_cpumask); + LASSERT(n > 0); + + rc = cfs_cpt_choose_ncpus(cptab, cpt, mask, n); + if (rc < 0) + goto failed; + + LASSERT(num >= cpumask_weight(part->cpt_cpumask)); + if (num == cpumask_weight(part->cpt_cpumask)) + cpt++; + } + } + + if (cpt != ncpt || + num != cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)) { + CERROR("Expect %d(%d) CPU partitions but got %d(%d), CPU hotplug/unplug while setting?\n", + cptab->ctb_nparts, num, cpt, + cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)); + goto failed; + } + + LIBCFS_FREE(mask, cpumask_size()); + + return cptab; + + failed: + CERROR("Failed to setup CPU-partition-table with %d CPU-partitions, online HW nodes: %d, HW cpus: %d.\n", + ncpt, num_online_nodes(), num_online_cpus()); + + if (mask) + LIBCFS_FREE(mask, cpumask_size()); + + if (cptab) + cfs_cpt_table_free(cptab); + + return NULL; +} + +static struct cfs_cpt_table * +cfs_cpt_table_create_pattern(char *pattern) +{ + struct cfs_cpt_table *cptab; + char *str = pattern; + int node = 0; + int high; + int ncpt; + int c; + + for (ncpt = 0;; ncpt++) { /* quick scan bracket */ + str = strchr(str, '['); + if (!str) + break; + str++; + } + + str = cfs_trimwhite(pattern); + if (*str == 'n' || *str == 'N') { + pattern = str + 1; + node = 1; + } + + if (ncpt == 0 || + (node && ncpt > num_online_nodes()) || + (!node && ncpt > num_online_cpus())) { + CERROR("Invalid pattern %s, or too many partitions %d\n", + pattern, ncpt); + return NULL; + } + + high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1; + + cptab = cfs_cpt_table_alloc(ncpt); + if (!cptab) { + CERROR("Failed to allocate cpu partition table\n"); + return NULL; + } + + for (str = cfs_trimwhite(pattern), c = 0;; c++) { + struct cfs_range_expr *range; + struct cfs_expr_list *el; + char *bracket = strchr(str, '['); + int cpt; + int rc; + int i; + int n; + + if (!bracket) { + if (*str != 0) { + CERROR("Invalid pattern %s\n", str); + goto failed; + } + if (c != ncpt) { + CERROR("expect %d partitions but found %d\n", + ncpt, c); + goto failed; + } + break; + } + + if (sscanf(str, "%d%n", &cpt, &n) < 1) { + CERROR("Invalid cpu pattern %s\n", str); + goto failed; + } + + if (cpt < 0 || cpt >= ncpt) { + CERROR("Invalid partition id %d, total partitions %d\n", + cpt, ncpt); + goto failed; + } + + if (cfs_cpt_weight(cptab, cpt) != 0) { + CERROR("Partition %d has already been set.\n", cpt); + goto failed; + } + + str = cfs_trimwhite(str + n); + if (str != bracket) { + CERROR("Invalid pattern %s\n", str); + goto failed; + } + + bracket = strchr(str, ']'); + if (!bracket) { + CERROR("missing right bracket for cpt %d, %s\n", + cpt, str); + goto failed; + } + + if (cfs_expr_list_parse(str, (bracket - str) + 1, + 0, high, &el) != 0) { + CERROR("Can't parse number range: %s\n", str); + goto failed; + } + + list_for_each_entry(range, &el->el_exprs, re_link) { + for (i = range->re_lo; i <= range->re_hi; i++) { + if ((i - range->re_lo) % range->re_stride != 0) + continue; + + rc = node ? cfs_cpt_set_node(cptab, cpt, i) : + cfs_cpt_set_cpu(cptab, cpt, i); + if (!rc) { + cfs_expr_list_free(el); + goto failed; + } + } + } + + cfs_expr_list_free(el); + + if (!cfs_cpt_online(cptab, cpt)) { + CERROR("No online CPU is found on partition %d\n", cpt); + goto failed; + } + + str = cfs_trimwhite(bracket + 1); + } + + return cptab; + + failed: + cfs_cpt_table_free(cptab); + return NULL; +} + +#ifdef CONFIG_HOTPLUG_CPU +static int +cfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + bool warn; + + switch (action) { + case CPU_DEAD: + case CPU_DEAD_FROZEN: + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + spin_lock(&cpt_data.cpt_lock); + cpt_data.cpt_version++; + spin_unlock(&cpt_data.cpt_lock); + /* Fall through */ + default: + if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) { + CDEBUG(D_INFO, "CPU changed [cpu %u action %lx]\n", + cpu, action); + break; + } + + mutex_lock(&cpt_data.cpt_mutex); + /* if all HTs in a core are offline, it may break affinity */ + cpumask_copy(cpt_data.cpt_cpumask, + topology_sibling_cpumask(cpu)); + warn = cpumask_any_and(cpt_data.cpt_cpumask, + cpu_online_mask) >= nr_cpu_ids; + mutex_unlock(&cpt_data.cpt_mutex); + CDEBUG(warn ? D_WARNING : D_INFO, + "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u action: %lx]\n", + cpu, action); + } + + return NOTIFY_OK; +} + +static struct notifier_block cfs_cpu_notifier = { + .notifier_call = cfs_cpu_notify, + .priority = 0 +}; + +#endif + +void +cfs_cpu_fini(void) +{ + if (cfs_cpt_table) + cfs_cpt_table_free(cfs_cpt_table); + +#ifdef CONFIG_HOTPLUG_CPU + unregister_hotcpu_notifier(&cfs_cpu_notifier); +#endif + if (cpt_data.cpt_cpumask) + LIBCFS_FREE(cpt_data.cpt_cpumask, cpumask_size()); +} + +int +cfs_cpu_init(void) +{ + LASSERT(!cfs_cpt_table); + + memset(&cpt_data, 0, sizeof(cpt_data)); + + LIBCFS_ALLOC(cpt_data.cpt_cpumask, cpumask_size()); + if (!cpt_data.cpt_cpumask) { + CERROR("Failed to allocate scratch buffer\n"); + return -1; + } + + spin_lock_init(&cpt_data.cpt_lock); + mutex_init(&cpt_data.cpt_mutex); + +#ifdef CONFIG_HOTPLUG_CPU + register_hotcpu_notifier(&cfs_cpu_notifier); +#endif + + if (*cpu_pattern != 0) { + cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern); + if (!cfs_cpt_table) { + CERROR("Failed to create cptab from pattern %s\n", + cpu_pattern); + goto failed; + } + + } else { + cfs_cpt_table = cfs_cpt_table_create(cpu_npartitions); + if (!cfs_cpt_table) { + CERROR("Failed to create ptable with npartitions %d\n", + cpu_npartitions); + goto failed; + } + } + + spin_lock(&cpt_data.cpt_lock); + if (cfs_cpt_table->ctb_version != cpt_data.cpt_version) { + spin_unlock(&cpt_data.cpt_lock); + CERROR("CPU hotplug/unplug during setup\n"); + goto failed; + } + spin_unlock(&cpt_data.cpt_lock); + + LCONSOLE(0, "HW CPU cores: %d, npartitions: %d\n", + num_online_cpus(), cfs_cpt_number(cfs_cpt_table)); + return 0; + + failed: + cfs_cpu_fini(); + return -1; +} + +#endif diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c new file mode 100644 index 000000000..db0572733 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c @@ -0,0 +1,137 @@ +/* GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please visit http://www.xyratex.com/contact if you need additional + * information or have any questions. + * + * GPL HEADER END + */ + +/* + * Copyright 2012 Xyratex Technology Limited + */ + +/* + * This is crypto api shash wrappers to zlib_adler32. + */ + +#include <linux/module.h> +#include <linux/zutil.h> +#include <crypto/internal/hash.h> +#include "linux-crypto.h" + +#define CHKSUM_BLOCK_SIZE 1 +#define CHKSUM_DIGEST_SIZE 4 + +static int adler32_cra_init(struct crypto_tfm *tfm) +{ + u32 *key = crypto_tfm_ctx(tfm); + + *key = 1; + + return 0; +} + +static int adler32_setkey(struct crypto_shash *hash, const u8 *key, + unsigned int keylen) +{ + u32 *mctx = crypto_shash_ctx(hash); + + if (keylen != sizeof(u32)) { + crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + *mctx = *(u32 *)key; + return 0; +} + +static int adler32_init(struct shash_desc *desc) +{ + u32 *mctx = crypto_shash_ctx(desc->tfm); + u32 *cksump = shash_desc_ctx(desc); + + *cksump = *mctx; + + return 0; +} + +static int adler32_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + u32 *cksump = shash_desc_ctx(desc); + + *cksump = zlib_adler32(*cksump, data, len); + return 0; +} + +static int __adler32_finup(u32 *cksump, const u8 *data, unsigned int len, + u8 *out) +{ + *(u32 *)out = zlib_adler32(*cksump, data, len); + return 0; +} + +static int adler32_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __adler32_finup(shash_desc_ctx(desc), data, len, out); +} + +static int adler32_final(struct shash_desc *desc, u8 *out) +{ + u32 *cksump = shash_desc_ctx(desc); + + *(u32 *)out = *cksump; + return 0; +} + +static int adler32_digest(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __adler32_finup(crypto_shash_ctx(desc->tfm), data, len, + out); +} + +static struct shash_alg alg = { + .setkey = adler32_setkey, + .init = adler32_init, + .update = adler32_update, + .final = adler32_final, + .finup = adler32_finup, + .digest = adler32_digest, + .descsize = sizeof(u32), + .digestsize = CHKSUM_DIGEST_SIZE, + .base = { + .cra_name = "adler32", + .cra_driver_name = "adler32-zlib", + .cra_priority = 100, + .cra_blocksize = CHKSUM_BLOCK_SIZE, + .cra_ctxsize = sizeof(u32), + .cra_module = THIS_MODULE, + .cra_init = adler32_cra_init, + } +}; + +int cfs_crypto_adler32_register(void) +{ + return crypto_register_shash(&alg); +} + +void cfs_crypto_adler32_unregister(void) +{ + crypto_unregister_shash(&alg); +} diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c new file mode 100644 index 000000000..8c9377ed8 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c @@ -0,0 +1,297 @@ +/* GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please visit http://www.xyratex.com/contact if you need additional + * information or have any questions. + * + * GPL HEADER END + */ + +/* + * Copyright 2012 Xyratex Technology Limited + * + * Copyright (c) 2012, Intel Corporation. + */ + +#include <crypto/hash.h> +#include <linux/scatterlist.h> +#include "../../../include/linux/libcfs/libcfs.h" +#include "linux-crypto.h" +/** + * Array of hash algorithm speed in MByte per second + */ +static int cfs_crypto_hash_speeds[CFS_HASH_ALG_MAX]; + +static int cfs_crypto_hash_alloc(unsigned char alg_id, + const struct cfs_crypto_hash_type **type, + struct ahash_request **req, + unsigned char *key, + unsigned int key_len) +{ + struct crypto_ahash *tfm; + int err = 0; + + *type = cfs_crypto_hash_type(alg_id); + + if (!*type) { + CWARN("Unsupported hash algorithm id = %d, max id is %d\n", + alg_id, CFS_HASH_ALG_MAX); + return -EINVAL; + } + tfm = crypto_alloc_ahash((*type)->cht_name, 0, CRYPTO_ALG_ASYNC); + + if (IS_ERR(tfm)) { + CDEBUG(D_INFO, "Failed to alloc crypto hash %s\n", + (*type)->cht_name); + return PTR_ERR(tfm); + } + + *req = ahash_request_alloc(tfm, GFP_KERNEL); + if (!*req) { + CDEBUG(D_INFO, "Failed to alloc ahash_request for %s\n", + (*type)->cht_name); + crypto_free_ahash(tfm); + return -ENOMEM; + } + + ahash_request_set_callback(*req, 0, NULL, NULL); + + /** Shash have different logic for initialization then digest + * shash: crypto_hash_setkey, crypto_hash_init + * digest: crypto_digest_init, crypto_digest_setkey + * Skip this function for digest, because we use shash logic at + * cfs_crypto_hash_alloc. + */ + if (key) + err = crypto_ahash_setkey(tfm, key, key_len); + else if ((*type)->cht_key != 0) + err = crypto_ahash_setkey(tfm, + (unsigned char *)&((*type)->cht_key), + (*type)->cht_size); + + if (err != 0) { + crypto_free_ahash(tfm); + return err; + } + + CDEBUG(D_INFO, "Using crypto hash: %s (%s) speed %d MB/s\n", + crypto_ahash_alg_name(tfm), crypto_ahash_driver_name(tfm), + cfs_crypto_hash_speeds[alg_id]); + + err = crypto_ahash_init(*req); + if (err) { + ahash_request_free(*req); + crypto_free_ahash(tfm); + } + return err; +} + +int cfs_crypto_hash_digest(unsigned char alg_id, + const void *buf, unsigned int buf_len, + unsigned char *key, unsigned int key_len, + unsigned char *hash, unsigned int *hash_len) +{ + struct scatterlist sl; + struct ahash_request *req; + int err; + const struct cfs_crypto_hash_type *type; + + if (!buf || buf_len == 0 || !hash_len) + return -EINVAL; + + err = cfs_crypto_hash_alloc(alg_id, &type, &req, key, key_len); + if (err != 0) + return err; + + if (!hash || *hash_len < type->cht_size) { + *hash_len = type->cht_size; + crypto_free_ahash(crypto_ahash_reqtfm(req)); + ahash_request_free(req); + return -ENOSPC; + } + sg_init_one(&sl, buf, buf_len); + + ahash_request_set_crypt(req, &sl, hash, sl.length); + err = crypto_ahash_digest(req); + crypto_free_ahash(crypto_ahash_reqtfm(req)); + ahash_request_free(req); + + return err; +} +EXPORT_SYMBOL(cfs_crypto_hash_digest); + +struct cfs_crypto_hash_desc * + cfs_crypto_hash_init(unsigned char alg_id, + unsigned char *key, unsigned int key_len) +{ + struct ahash_request *req; + int err; + const struct cfs_crypto_hash_type *type; + + err = cfs_crypto_hash_alloc(alg_id, &type, &req, key, key_len); + + if (err) + return ERR_PTR(err); + return (struct cfs_crypto_hash_desc *)req; +} +EXPORT_SYMBOL(cfs_crypto_hash_init); + +int cfs_crypto_hash_update_page(struct cfs_crypto_hash_desc *hdesc, + struct page *page, unsigned int offset, + unsigned int len) +{ + struct ahash_request *req = (void *)hdesc; + struct scatterlist sl; + + sg_init_table(&sl, 1); + sg_set_page(&sl, page, len, offset & ~CFS_PAGE_MASK); + + ahash_request_set_crypt(req, &sl, NULL, sl.length); + return crypto_ahash_update(req); +} +EXPORT_SYMBOL(cfs_crypto_hash_update_page); + +int cfs_crypto_hash_update(struct cfs_crypto_hash_desc *hdesc, + const void *buf, unsigned int buf_len) +{ + struct ahash_request *req = (void *)hdesc; + struct scatterlist sl; + + sg_init_one(&sl, buf, buf_len); + + ahash_request_set_crypt(req, &sl, NULL, sl.length); + return crypto_ahash_update(req); +} +EXPORT_SYMBOL(cfs_crypto_hash_update); + +/* If hash_len pointer is NULL - destroy descriptor. */ +int cfs_crypto_hash_final(struct cfs_crypto_hash_desc *hdesc, + unsigned char *hash, unsigned int *hash_len) +{ + int err; + struct ahash_request *req = (void *)hdesc; + int size = crypto_ahash_digestsize(crypto_ahash_reqtfm(req)); + + if (!hash_len) { + crypto_free_ahash(crypto_ahash_reqtfm(req)); + ahash_request_free(req); + return 0; + } + if (!hash || *hash_len < size) { + *hash_len = size; + return -ENOSPC; + } + ahash_request_set_crypt(req, NULL, hash, 0); + err = crypto_ahash_final(req); + + if (err < 0) { + /* May be caller can fix error */ + return err; + } + crypto_free_ahash(crypto_ahash_reqtfm(req)); + ahash_request_free(req); + return err; +} +EXPORT_SYMBOL(cfs_crypto_hash_final); + +static void cfs_crypto_performance_test(unsigned char alg_id, + const unsigned char *buf, + unsigned int buf_len) +{ + unsigned long start, end; + int bcount, err = 0; + int sec = 1; /* do test only 1 sec */ + unsigned char hash[64]; + unsigned int hash_len = 64; + + for (start = jiffies, end = start + sec * HZ, bcount = 0; + time_before(jiffies, end); bcount++) { + err = cfs_crypto_hash_digest(alg_id, buf, buf_len, NULL, 0, + hash, &hash_len); + if (err) + break; + } + end = jiffies; + + if (err) { + cfs_crypto_hash_speeds[alg_id] = -1; + CDEBUG(D_INFO, "Crypto hash algorithm %s, err = %d\n", + cfs_crypto_hash_name(alg_id), err); + } else { + unsigned long tmp; + + tmp = ((bcount * buf_len / jiffies_to_msecs(end - start)) * + 1000) / (1024 * 1024); + cfs_crypto_hash_speeds[alg_id] = (int)tmp; + } + CDEBUG(D_INFO, "Crypto hash algorithm %s speed = %d MB/s\n", + cfs_crypto_hash_name(alg_id), cfs_crypto_hash_speeds[alg_id]); +} + +int cfs_crypto_hash_speed(unsigned char hash_alg) +{ + if (hash_alg < CFS_HASH_ALG_MAX) + return cfs_crypto_hash_speeds[hash_alg]; + return -1; +} +EXPORT_SYMBOL(cfs_crypto_hash_speed); + +/** + * Do performance test for all hash algorithms. + */ +static int cfs_crypto_test_hashes(void) +{ + unsigned char i; + unsigned char *data; + unsigned int j; + /* Data block size for testing hash. Maximum + * kmalloc size for 2.6.18 kernel is 128K + */ + unsigned int data_len = 1 * 128 * 1024; + + data = kmalloc(data_len, 0); + if (!data) + return -ENOMEM; + + for (j = 0; j < data_len; j++) + data[j] = j & 0xff; + + for (i = 0; i < CFS_HASH_ALG_MAX; i++) + cfs_crypto_performance_test(i, data, data_len); + + kfree(data); + return 0; +} + +static int adler32; + +int cfs_crypto_register(void) +{ + request_module("crc32c"); + + adler32 = cfs_crypto_adler32_register(); + + /* check all algorithms and do performance test */ + cfs_crypto_test_hashes(); + return 0; +} + +void cfs_crypto_unregister(void) +{ + if (adler32 == 0) + cfs_crypto_adler32_unregister(); +} diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h new file mode 100644 index 000000000..18e8cd4d8 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h @@ -0,0 +1,29 @@ + /* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please visit http://www.xyratex.com/contact if you need additional + * information or have any questions. + * + * GPL HEADER END + */ + +/** + * Functions for start/stop shash adler32 algorithm. + */ +int cfs_crypto_adler32_register(void); +void cfs_crypto_adler32_unregister(void); diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-curproc.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-curproc.c new file mode 100644 index 000000000..13d31e8a9 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-curproc.c @@ -0,0 +1,111 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2015, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/libcfs/linux/linux-curproc.c + * + * Lustre curproc API implementation for Linux kernel + * + * Author: Nikita Danilov <nikita@clusterfs.com> + */ + +#include <linux/sched.h> +#include <linux/fs_struct.h> + +#include <linux/compat.h> +#include <linux/thread_info.h> + +#define DEBUG_SUBSYSTEM S_LNET + +#include "../../../include/linux/libcfs/libcfs.h" + +/* + * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h) + * for Linux kernel. + */ + +void cfs_cap_raise(cfs_cap_t cap) +{ + struct cred *cred; + + cred = prepare_creds(); + if (cred) { + cap_raise(cred->cap_effective, cap); + commit_creds(cred); + } +} +EXPORT_SYMBOL(cfs_cap_raise); + +void cfs_cap_lower(cfs_cap_t cap) +{ + struct cred *cred; + + cred = prepare_creds(); + if (cred) { + cap_lower(cred->cap_effective, cap); + commit_creds(cred); + } +} +EXPORT_SYMBOL(cfs_cap_lower); + +int cfs_cap_raised(cfs_cap_t cap) +{ + return cap_raised(current_cap(), cap); +} +EXPORT_SYMBOL(cfs_cap_raised); + +static void cfs_kernel_cap_pack(kernel_cap_t kcap, cfs_cap_t *cap) +{ + /* XXX lost high byte */ + *cap = kcap.cap[0]; +} + +cfs_cap_t cfs_curproc_cap_pack(void) +{ + cfs_cap_t cap; + + cfs_kernel_cap_pack(current_cap(), &cap); + return cap; +} +EXPORT_SYMBOL(cfs_curproc_cap_pack); + +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c new file mode 100644 index 000000000..638e4b33d --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c @@ -0,0 +1,200 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/libcfs/linux/linux-debug.c + * + * Author: Phil Schwan <phil@clusterfs.com> + */ + +#include <linux/module.h> +#include <linux/kmod.h> +#include <linux/notifier.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/unistd.h> +#include <linux/interrupt.h> +#include <linux/completion.h> +#include <linux/fs.h> +#include <linux/uaccess.h> +#include <linux/miscdevice.h> + +# define DEBUG_SUBSYSTEM S_LNET + +#include "../../../include/linux/libcfs/libcfs.h" + +#include "../tracefile.h" + +#include <linux/kallsyms.h> + +char lnet_upcall[1024] = "/usr/lib/lustre/lnet_upcall"; +char lnet_debug_log_upcall[1024] = "/usr/lib/lustre/lnet_debug_log_upcall"; + +/** + * Upcall function once a Lustre log has been dumped. + * + * \param file path of the dumped log + */ +void libcfs_run_debug_log_upcall(char *file) +{ + char *argv[3]; + int rc; + char *envp[] = { + "HOME=/", + "PATH=/sbin:/bin:/usr/sbin:/usr/bin", + NULL}; + + argv[0] = lnet_debug_log_upcall; + + LASSERTF(file, "called on a null filename\n"); + argv[1] = file; /* only need to pass the path of the file */ + + argv[2] = NULL; + + rc = call_usermodehelper(argv[0], argv, envp, 1); + if (rc < 0 && rc != -ENOENT) { + CERROR("Error %d invoking LNET debug log upcall %s %s; check /sys/kernel/debug/lnet/debug_log_upcall\n", + rc, argv[0], argv[1]); + } else { + CDEBUG(D_HA, "Invoked LNET debug log upcall %s %s\n", + argv[0], argv[1]); + } +} + +void libcfs_run_upcall(char **argv) +{ + int rc; + int argc; + char *envp[] = { + "HOME=/", + "PATH=/sbin:/bin:/usr/sbin:/usr/bin", + NULL}; + + argv[0] = lnet_upcall; + argc = 1; + while (argv[argc]) + argc++; + + LASSERT(argc >= 2); + + rc = call_usermodehelper(argv[0], argv, envp, 1); + if (rc < 0 && rc != -ENOENT) { + CERROR("Error %d invoking LNET upcall %s %s%s%s%s%s%s%s%s; check /sys/kernel/debug/lnet/upcall\n", + rc, argv[0], argv[1], + argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], + argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], + argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], + argc < 6 ? "" : ",..."); + } else { + CDEBUG(D_HA, "Invoked LNET upcall %s %s%s%s%s%s%s%s%s\n", + argv[0], argv[1], + argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], + argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], + argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], + argc < 6 ? "" : ",..."); + } +} + +void libcfs_run_lbug_upcall(struct libcfs_debug_msg_data *msgdata) +{ + char *argv[6]; + char buf[32]; + + snprintf(buf, sizeof(buf), "%d", msgdata->msg_line); + + argv[1] = "LBUG"; + argv[2] = (char *)msgdata->msg_file; + argv[3] = (char *)msgdata->msg_fn; + argv[4] = buf; + argv[5] = NULL; + + libcfs_run_upcall(argv); +} +EXPORT_SYMBOL(libcfs_run_lbug_upcall); + +/* coverity[+kill] */ +void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msgdata) +{ + libcfs_catastrophe = 1; + libcfs_debug_msg(msgdata, "LBUG\n"); + + if (in_interrupt()) { + panic("LBUG in interrupt.\n"); + /* not reached */ + } + + dump_stack(); + if (!libcfs_panic_on_lbug) + libcfs_debug_dumplog(); + libcfs_run_lbug_upcall(msgdata); + if (libcfs_panic_on_lbug) + panic("LBUG"); + set_task_state(current, TASK_UNINTERRUPTIBLE); + while (1) + schedule(); +} +EXPORT_SYMBOL(lbug_with_loc); + +static int panic_notifier(struct notifier_block *self, unsigned long unused1, + void *unused2) +{ + if (libcfs_panic_in_progress) + return 0; + + libcfs_panic_in_progress = 1; + mb(); + + return 0; +} + +static struct notifier_block libcfs_panic_notifier = { + .notifier_call = panic_notifier, + .next = NULL, + .priority = 10000, +}; + +void libcfs_register_panic_notifier(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &libcfs_panic_notifier); +} + +void libcfs_unregister_panic_notifier(void) +{ + atomic_notifier_chain_unregister(&panic_notifier_list, + &libcfs_panic_notifier); +} diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c new file mode 100644 index 000000000..86f32ffc5 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c @@ -0,0 +1,59 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + */ +/* + * This file creates a memory allocation primitive for Lustre, that + * allows to fallback to vmalloc allocations should regular kernel allocations + * fail due to size or system memory fragmentation. + * + * Author: Oleg Drokin <green@linuxhacker.ru> + * + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Seagate Technology. + */ +#include <linux/slab.h> +#include <linux/vmalloc.h> + +#include "../../../include/linux/libcfs/libcfs.h" + +void *libcfs_kvzalloc(size_t size, gfp_t flags) +{ + void *ret; + + ret = kzalloc(size, flags | __GFP_NOWARN); + if (!ret) + ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); + return ret; +} +EXPORT_SYMBOL(libcfs_kvzalloc); + +void *libcfs_kvzalloc_cpt(struct cfs_cpt_table *cptab, int cpt, size_t size, + gfp_t flags) +{ + void *ret; + + ret = kzalloc_node(size, flags | __GFP_NOWARN, + cfs_cpt_spread_node(cptab, cpt)); + if (!ret) { + WARN_ON(!(flags & (__GFP_FS | __GFP_HIGH))); + ret = vmalloc_node(size, cfs_cpt_spread_node(cptab, cpt)); + } + + return ret; +} +EXPORT_SYMBOL(libcfs_kvzalloc_cpt); diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c new file mode 100644 index 000000000..ebc60ac9b --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c @@ -0,0 +1,159 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#define DEBUG_SUBSYSTEM S_LNET + +#include "../../../include/linux/libcfs/libcfs.h" + +#define LNET_MINOR 240 + +int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data) +{ + if (libcfs_ioctl_is_invalid(data)) { + CERROR("LNET: ioctl not correctly formatted\n"); + return -EINVAL; + } + + if (data->ioc_inllen1) + data->ioc_inlbuf1 = &data->ioc_bulk[0]; + + if (data->ioc_inllen2) + data->ioc_inlbuf2 = &data->ioc_bulk[0] + + cfs_size_round(data->ioc_inllen1); + + return 0; +} + +int libcfs_ioctl_getdata_len(const struct libcfs_ioctl_hdr __user *arg, + __u32 *len) +{ + struct libcfs_ioctl_hdr hdr; + + if (copy_from_user(&hdr, arg, sizeof(hdr))) + return -EFAULT; + + if (hdr.ioc_version != LIBCFS_IOCTL_VERSION && + hdr.ioc_version != LIBCFS_IOCTL_VERSION2) { + CERROR("LNET: version mismatch expected %#x, got %#x\n", + LIBCFS_IOCTL_VERSION, hdr.ioc_version); + return -EINVAL; + } + + *len = hdr.ioc_len; + + return 0; +} + +int libcfs_ioctl_popdata(void __user *arg, void *data, int size) +{ + if (copy_to_user(arg, data, size)) + return -EFAULT; + return 0; +} + +static int +libcfs_psdev_open(struct inode *inode, struct file *file) +{ + int rc = 0; + + if (!inode) + return -EINVAL; + if (libcfs_psdev_ops.p_open) + rc = libcfs_psdev_ops.p_open(0, NULL); + else + return -EPERM; + return rc; +} + +/* called when closing /dev/device */ +static int +libcfs_psdev_release(struct inode *inode, struct file *file) +{ + int rc = 0; + + if (!inode) + return -EINVAL; + if (libcfs_psdev_ops.p_close) + rc = libcfs_psdev_ops.p_close(0, NULL); + else + rc = -EPERM; + return rc; +} + +static long libcfs_ioctl(struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct cfs_psdev_file pfile; + int rc = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if (_IOC_TYPE(cmd) != IOC_LIBCFS_TYPE || + _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR || + _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR) { + CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n", + _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd)); + return -EINVAL; + } + + /* Handle platform-dependent IOC requests */ + switch (cmd) { + case IOC_LIBCFS_PANIC: + if (!capable(CFS_CAP_SYS_BOOT)) + return -EPERM; + panic("debugctl-invoked panic"); + return 0; + } + + if (libcfs_psdev_ops.p_ioctl) + rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void __user *)arg); + else + rc = -EPERM; + return rc; +} + +static const struct file_operations libcfs_fops = { + .unlocked_ioctl = libcfs_ioctl, + .open = libcfs_psdev_open, + .release = libcfs_psdev_release, +}; + +struct miscdevice libcfs_dev = { + .minor = LNET_MINOR, + .name = "lnet", + .fops = &libcfs_fops, +}; diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c new file mode 100644 index 000000000..890844602 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c @@ -0,0 +1,147 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#define DEBUG_SUBSYSTEM S_LNET +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/fs_struct.h> +#include <linux/sched.h> + +#include "../../../include/linux/libcfs/libcfs.h" + +#if defined(CONFIG_KGDB) +#include <linux/kgdb.h> +#endif + +/** + * wait_queue_t of Linux (version < 2.6.34) is a FIFO list for exclusively + * waiting threads, which is not always desirable because all threads will + * be waken up again and again, even user only needs a few of them to be + * active most time. This is not good for performance because cache can + * be polluted by different threads. + * + * LIFO list can resolve this problem because we always wakeup the most + * recent active thread by default. + * + * NB: please don't call non-exclusive & exclusive wait on the same + * waitq if add_wait_queue_exclusive_head is used. + */ +void +add_wait_queue_exclusive_head(wait_queue_head_t *waitq, wait_queue_t *link) +{ + unsigned long flags; + + spin_lock_irqsave(&waitq->lock, flags); + __add_wait_queue_exclusive(waitq, link); + spin_unlock_irqrestore(&waitq->lock, flags); +} +EXPORT_SYMBOL(add_wait_queue_exclusive_head); + +sigset_t +cfs_block_allsigs(void) +{ + unsigned long flags; + sigset_t old; + + spin_lock_irqsave(¤t->sighand->siglock, flags); + old = current->blocked; + sigfillset(¤t->blocked); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + + return old; +} +EXPORT_SYMBOL(cfs_block_allsigs); + +sigset_t cfs_block_sigs(unsigned long sigs) +{ + unsigned long flags; + sigset_t old; + + spin_lock_irqsave(¤t->sighand->siglock, flags); + old = current->blocked; + sigaddsetmask(¤t->blocked, sigs); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + return old; +} +EXPORT_SYMBOL(cfs_block_sigs); + +/* Block all signals except for the @sigs */ +sigset_t cfs_block_sigsinv(unsigned long sigs) +{ + unsigned long flags; + sigset_t old; + + spin_lock_irqsave(¤t->sighand->siglock, flags); + old = current->blocked; + sigaddsetmask(¤t->blocked, ~sigs); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + + return old; +} +EXPORT_SYMBOL(cfs_block_sigsinv); + +void +cfs_restore_sigs(sigset_t old) +{ + unsigned long flags; + + spin_lock_irqsave(¤t->sighand->siglock, flags); + current->blocked = old; + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); +} +EXPORT_SYMBOL(cfs_restore_sigs); + +int +cfs_signal_pending(void) +{ + return signal_pending(current); +} +EXPORT_SYMBOL(cfs_signal_pending); + +void +cfs_clear_sigpending(void) +{ + unsigned long flags; + + spin_lock_irqsave(¤t->sighand->siglock, flags); + clear_tsk_thread_flag(current, TIF_SIGPENDING); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); +} +EXPORT_SYMBOL(cfs_clear_sigpending); diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c new file mode 100644 index 000000000..91c2ae8f9 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c @@ -0,0 +1,259 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#define DEBUG_SUBSYSTEM S_LNET +#define LUSTRE_TRACEFILE_PRIVATE + +#include "../../../include/linux/libcfs/libcfs.h" +#include "../tracefile.h" + +/* percents to share the total debug memory for each type */ +static unsigned int pages_factor[CFS_TCD_TYPE_MAX] = { + 80, /* 80% pages for CFS_TCD_TYPE_PROC */ + 10, /* 10% pages for CFS_TCD_TYPE_SOFTIRQ */ + 10 /* 10% pages for CFS_TCD_TYPE_IRQ */ +}; + +char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX]; + +static DECLARE_RWSEM(cfs_tracefile_sem); + +int cfs_tracefile_init_arch(void) +{ + int i; + int j; + struct cfs_trace_cpu_data *tcd; + + /* initialize trace_data */ + memset(cfs_trace_data, 0, sizeof(cfs_trace_data)); + for (i = 0; i < CFS_TCD_TYPE_MAX; i++) { + cfs_trace_data[i] = + kmalloc(sizeof(union cfs_trace_data_union) * + num_possible_cpus(), GFP_KERNEL); + if (!cfs_trace_data[i]) + goto out; + } + + /* arch related info initialized */ + cfs_tcd_for_each(tcd, i, j) { + spin_lock_init(&tcd->tcd_lock); + tcd->tcd_pages_factor = pages_factor[i]; + tcd->tcd_type = i; + tcd->tcd_cpu = j; + } + + for (i = 0; i < num_possible_cpus(); i++) + for (j = 0; j < 3; j++) { + cfs_trace_console_buffers[i][j] = + kmalloc(CFS_TRACE_CONSOLE_BUFFER_SIZE, + GFP_KERNEL); + + if (!cfs_trace_console_buffers[i][j]) + goto out; + } + + return 0; + +out: + cfs_tracefile_fini_arch(); + printk(KERN_ERR "lnet: Not enough memory\n"); + return -ENOMEM; +} + +void cfs_tracefile_fini_arch(void) +{ + int i; + int j; + + for (i = 0; i < num_possible_cpus(); i++) + for (j = 0; j < 3; j++) { + kfree(cfs_trace_console_buffers[i][j]); + cfs_trace_console_buffers[i][j] = NULL; + } + + for (i = 0; cfs_trace_data[i]; i++) { + kfree(cfs_trace_data[i]); + cfs_trace_data[i] = NULL; + } +} + +void cfs_tracefile_read_lock(void) +{ + down_read(&cfs_tracefile_sem); +} + +void cfs_tracefile_read_unlock(void) +{ + up_read(&cfs_tracefile_sem); +} + +void cfs_tracefile_write_lock(void) +{ + down_write(&cfs_tracefile_sem); +} + +void cfs_tracefile_write_unlock(void) +{ + up_write(&cfs_tracefile_sem); +} + +enum cfs_trace_buf_type cfs_trace_buf_idx_get(void) +{ + if (in_irq()) + return CFS_TCD_TYPE_IRQ; + if (in_softirq()) + return CFS_TCD_TYPE_SOFTIRQ; + return CFS_TCD_TYPE_PROC; +} + +/* + * The walking argument indicates the locking comes from all tcd types + * iterator and we must lock it and dissable local irqs to avoid deadlocks + * with other interrupt locks that might be happening. See LU-1311 + * for details. + */ +int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking) + __acquires(&tcd->tc_lock) +{ + __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX); + if (tcd->tcd_type == CFS_TCD_TYPE_IRQ) + spin_lock_irqsave(&tcd->tcd_lock, tcd->tcd_lock_flags); + else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ) + spin_lock_bh(&tcd->tcd_lock); + else if (unlikely(walking)) + spin_lock_irq(&tcd->tcd_lock); + else + spin_lock(&tcd->tcd_lock); + return 1; +} + +void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking) + __releases(&tcd->tcd_lock) +{ + __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX); + if (tcd->tcd_type == CFS_TCD_TYPE_IRQ) + spin_unlock_irqrestore(&tcd->tcd_lock, tcd->tcd_lock_flags); + else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ) + spin_unlock_bh(&tcd->tcd_lock); + else if (unlikely(walking)) + spin_unlock_irq(&tcd->tcd_lock); + else + spin_unlock(&tcd->tcd_lock); +} + +void +cfs_set_ptldebug_header(struct ptldebug_header *header, + struct libcfs_debug_msg_data *msgdata, + unsigned long stack) +{ + struct timespec64 ts; + + ktime_get_real_ts64(&ts); + + header->ph_subsys = msgdata->msg_subsys; + header->ph_mask = msgdata->msg_mask; + header->ph_cpu_id = smp_processor_id(); + header->ph_type = cfs_trace_buf_idx_get(); + /* y2038 safe since all user space treats this as unsigned, but + * will overflow in 2106 + */ + header->ph_sec = (u32)ts.tv_sec; + header->ph_usec = ts.tv_nsec / NSEC_PER_USEC; + header->ph_stack = stack; + header->ph_pid = current->pid; + header->ph_line_num = msgdata->msg_line; + header->ph_extern_pid = 0; +} + +static char * +dbghdr_to_err_string(struct ptldebug_header *hdr) +{ + switch (hdr->ph_subsys) { + case S_LND: + case S_LNET: + return "LNetError"; + default: + return "LustreError"; + } +} + +static char * +dbghdr_to_info_string(struct ptldebug_header *hdr) +{ + switch (hdr->ph_subsys) { + case S_LND: + case S_LNET: + return "LNet"; + default: + return "Lustre"; + } +} + +void cfs_print_to_console(struct ptldebug_header *hdr, int mask, + const char *buf, int len, const char *file, + const char *fn) +{ + char *prefix = "Lustre", *ptype = NULL; + + if ((mask & D_EMERG) != 0) { + prefix = dbghdr_to_err_string(hdr); + ptype = KERN_EMERG; + } else if ((mask & D_ERROR) != 0) { + prefix = dbghdr_to_err_string(hdr); + ptype = KERN_ERR; + } else if ((mask & D_WARNING) != 0) { + prefix = dbghdr_to_info_string(hdr); + ptype = KERN_WARNING; + } else if ((mask & (D_CONSOLE | libcfs_printk)) != 0) { + prefix = dbghdr_to_info_string(hdr); + ptype = KERN_INFO; + } + + if ((mask & D_CONSOLE) != 0) { + printk("%s%s: %.*s", ptype, prefix, len, buf); + } else { + printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, + hdr->ph_pid, hdr->ph_extern_pid, file, hdr->ph_line_num, + fn, len, buf); + } +} + +int cfs_trace_max_debug_mb(void) +{ + int total_mb = (totalram_pages >> (20 - PAGE_SHIFT)); + + return max(512, (total_mb * 80) / 100); +} diff --git a/drivers/staging/lustre/lnet/libcfs/module.c b/drivers/staging/lustre/lnet/libcfs/module.c new file mode 100644 index 000000000..cdc640bfd --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/module.c @@ -0,0 +1,674 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, 2015 Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/unistd.h> +#include <net/sock.h> +#include <linux/uio.h> + +#include <linux/uaccess.h> + +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/list.h> + +#include <linux/sysctl.h> +#include <linux/debugfs.h> + +# define DEBUG_SUBSYSTEM S_LNET + +#define LNET_MAX_IOCTL_BUF_LEN (sizeof(struct lnet_ioctl_net_config) + \ + sizeof(struct lnet_ioctl_config_data)) + +#include "../../include/linux/libcfs/libcfs.h" +#include <asm/div64.h> + +#include "../../include/linux/libcfs/libcfs_crypto.h" +#include "../../include/linux/lnet/lib-lnet.h" +#include "../../include/linux/lnet/lib-dlc.h" +#include "../../include/linux/lnet/lnet.h" +#include "tracefile.h" + +static struct dentry *lnet_debugfs_root; + +/* called when opening /dev/device */ +static int libcfs_psdev_open(unsigned long flags, void *args) +{ + try_module_get(THIS_MODULE); + return 0; +} + +/* called when closing /dev/device */ +static int libcfs_psdev_release(unsigned long flags, void *args) +{ + module_put(THIS_MODULE); + return 0; +} + +static DECLARE_RWSEM(ioctl_list_sem); +static LIST_HEAD(ioctl_list); + +int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand) +{ + int rc = 0; + + down_write(&ioctl_list_sem); + if (!list_empty(&hand->item)) + rc = -EBUSY; + else + list_add_tail(&hand->item, &ioctl_list); + up_write(&ioctl_list_sem); + + return rc; +} +EXPORT_SYMBOL(libcfs_register_ioctl); + +int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand) +{ + int rc = 0; + + down_write(&ioctl_list_sem); + if (list_empty(&hand->item)) + rc = -ENOENT; + else + list_del_init(&hand->item); + up_write(&ioctl_list_sem); + + return rc; +} +EXPORT_SYMBOL(libcfs_deregister_ioctl); + +static int libcfs_ioctl_handle(struct cfs_psdev_file *pfile, unsigned long cmd, + void __user *arg, struct libcfs_ioctl_hdr *hdr) +{ + struct libcfs_ioctl_data *data = NULL; + int err = -EINVAL; + + /* + * The libcfs_ioctl_data_adjust() function performs adjustment + * operations on the libcfs_ioctl_data structure to make + * it usable by the code. This doesn't need to be called + * for new data structures added. + */ + if (hdr->ioc_version == LIBCFS_IOCTL_VERSION) { + data = container_of(hdr, struct libcfs_ioctl_data, ioc_hdr); + err = libcfs_ioctl_data_adjust(data); + if (err) + return err; + } + + switch (cmd) { + case IOC_LIBCFS_CLEAR_DEBUG: + libcfs_debug_clear_buffer(); + return 0; + /* + * case IOC_LIBCFS_PANIC: + * Handled in arch/cfs_module.c + */ + case IOC_LIBCFS_MARK_DEBUG: + if (!data->ioc_inlbuf1 || + data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0') + return -EINVAL; + libcfs_debug_mark_buffer(data->ioc_inlbuf1); + return 0; + + default: { + struct libcfs_ioctl_handler *hand; + + err = -EINVAL; + down_read(&ioctl_list_sem); + list_for_each_entry(hand, &ioctl_list, item) { + err = hand->handle_ioctl(cmd, hdr); + if (err != -EINVAL) { + if (err == 0) + err = libcfs_ioctl_popdata(arg, + hdr, hdr->ioc_len); + break; + } + } + up_read(&ioctl_list_sem); + break; + } + } + + return err; +} + +static int libcfs_ioctl(struct cfs_psdev_file *pfile, unsigned long cmd, + void __user *arg) +{ + struct libcfs_ioctl_hdr *hdr; + int err = 0; + __u32 buf_len; + + err = libcfs_ioctl_getdata_len(arg, &buf_len); + if (err) + return err; + + /* + * do a check here to restrict the size of the memory + * to allocate to guard against DoS attacks. + */ + if (buf_len > LNET_MAX_IOCTL_BUF_LEN) { + CERROR("LNET: user buffer exceeds kernel buffer\n"); + return -EINVAL; + } + + LIBCFS_ALLOC_GFP(hdr, buf_len, GFP_KERNEL); + if (!hdr) + return -ENOMEM; + + /* 'cmd' and permissions get checked in our arch-specific caller */ + if (copy_from_user(hdr, arg, buf_len)) { + CERROR("LNET ioctl: data error\n"); + err = -EFAULT; + goto out; + } + + err = libcfs_ioctl_handle(pfile, cmd, arg, hdr); + +out: + LIBCFS_FREE(hdr, buf_len); + return err; +} + +struct cfs_psdev_ops libcfs_psdev_ops = { + libcfs_psdev_open, + libcfs_psdev_release, + NULL, + NULL, + libcfs_ioctl +}; + +int lprocfs_call_handler(void *data, int write, loff_t *ppos, + void __user *buffer, size_t *lenp, + int (*handler)(void *data, int write, loff_t pos, + void __user *buffer, int len)) +{ + int rc = handler(data, write, *ppos, buffer, *lenp); + + if (rc < 0) + return rc; + + if (write) { + *ppos += *lenp; + } else { + *lenp = rc; + *ppos += rc; + } + return 0; +} +EXPORT_SYMBOL(lprocfs_call_handler); + +static int __proc_dobitmasks(void *data, int write, + loff_t pos, void __user *buffer, int nob) +{ + const int tmpstrlen = 512; + char *tmpstr; + int rc; + unsigned int *mask = data; + int is_subsys = (mask == &libcfs_subsystem_debug) ? 1 : 0; + int is_printk = (mask == &libcfs_printk) ? 1 : 0; + + rc = cfs_trace_allocate_string_buffer(&tmpstr, tmpstrlen); + if (rc < 0) + return rc; + + if (!write) { + libcfs_debug_mask2str(tmpstr, tmpstrlen, *mask, is_subsys); + rc = strlen(tmpstr); + + if (pos >= rc) { + rc = 0; + } else { + rc = cfs_trace_copyout_string(buffer, nob, + tmpstr + pos, "\n"); + } + } else { + rc = cfs_trace_copyin_string(tmpstr, tmpstrlen, buffer, nob); + if (rc < 0) { + kfree(tmpstr); + return rc; + } + + rc = libcfs_debug_str2mask(mask, tmpstr, is_subsys); + /* Always print LBUG/LASSERT to console, so keep this mask */ + if (is_printk) + *mask |= D_EMERG; + } + + kfree(tmpstr); + return rc; +} + +static int proc_dobitmasks(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, + __proc_dobitmasks); +} + +static int __proc_dump_kernel(void *data, int write, + loff_t pos, void __user *buffer, int nob) +{ + if (!write) + return 0; + + return cfs_trace_dump_debug_buffer_usrstr(buffer, nob); +} + +static int proc_dump_kernel(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, + __proc_dump_kernel); +} + +static int __proc_daemon_file(void *data, int write, + loff_t pos, void __user *buffer, int nob) +{ + if (!write) { + int len = strlen(cfs_tracefile); + + if (pos >= len) + return 0; + + return cfs_trace_copyout_string(buffer, nob, + cfs_tracefile + pos, "\n"); + } + + return cfs_trace_daemon_command_usrstr(buffer, nob); +} + +static int proc_daemon_file(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, + __proc_daemon_file); +} + +static int libcfs_force_lbug(struct ctl_table *table, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + if (write) + LBUG(); + return 0; +} + +static int proc_fail_loc(struct ctl_table *table, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int rc; + long old_fail_loc = cfs_fail_loc; + + rc = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); + if (old_fail_loc != cfs_fail_loc) + wake_up(&cfs_race_waitq); + return rc; +} + +static int __proc_cpt_table(void *data, int write, + loff_t pos, void __user *buffer, int nob) +{ + char *buf = NULL; + int len = 4096; + int rc = 0; + + if (write) + return -EPERM; + + LASSERT(cfs_cpt_table); + + while (1) { + LIBCFS_ALLOC(buf, len); + if (!buf) + return -ENOMEM; + + rc = cfs_cpt_table_print(cfs_cpt_table, buf, len); + if (rc >= 0) + break; + + if (rc == -EFBIG) { + LIBCFS_FREE(buf, len); + len <<= 1; + continue; + } + goto out; + } + + if (pos >= rc) { + rc = 0; + goto out; + } + + rc = cfs_trace_copyout_string(buffer, nob, buf + pos, NULL); + out: + if (buf) + LIBCFS_FREE(buf, len); + return rc; +} + +static int proc_cpt_table(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, + __proc_cpt_table); +} + +static struct ctl_table lnet_table[] = { + { + .procname = "debug", + .data = &libcfs_debug, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dobitmasks, + }, + { + .procname = "subsystem_debug", + .data = &libcfs_subsystem_debug, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dobitmasks, + }, + { + .procname = "printk", + .data = &libcfs_printk, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dobitmasks, + }, + { + .procname = "cpu_partition_table", + .maxlen = 128, + .mode = 0444, + .proc_handler = &proc_cpt_table, + }, + + { + .procname = "upcall", + .data = lnet_upcall, + .maxlen = sizeof(lnet_upcall), + .mode = 0644, + .proc_handler = &proc_dostring, + }, + { + .procname = "debug_log_upcall", + .data = lnet_debug_log_upcall, + .maxlen = sizeof(lnet_debug_log_upcall), + .mode = 0644, + .proc_handler = &proc_dostring, + }, + { + .procname = "catastrophe", + .data = &libcfs_catastrophe, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = &proc_dointvec, + }, + { + .procname = "dump_kernel", + .maxlen = 256, + .mode = 0200, + .proc_handler = &proc_dump_kernel, + }, + { + .procname = "daemon_file", + .mode = 0644, + .maxlen = 256, + .proc_handler = &proc_daemon_file, + }, + { + .procname = "force_lbug", + .data = NULL, + .maxlen = 0, + .mode = 0200, + .proc_handler = &libcfs_force_lbug + }, + { + .procname = "fail_loc", + .data = &cfs_fail_loc, + .maxlen = sizeof(cfs_fail_loc), + .mode = 0644, + .proc_handler = &proc_fail_loc + }, + { + .procname = "fail_val", + .data = &cfs_fail_val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + } +}; + +static const struct lnet_debugfs_symlink_def lnet_debugfs_symlinks[] = { + { "console_ratelimit", + "/sys/module/libcfs/parameters/libcfs_console_ratelimit"}, + { "debug_path", + "/sys/module/libcfs/parameters/libcfs_debug_file_path"}, + { "panic_on_lbug", + "/sys/module/libcfs/parameters/libcfs_panic_on_lbug"}, + { "libcfs_console_backoff", + "/sys/module/libcfs/parameters/libcfs_console_backoff"}, + { "debug_mb", + "/sys/module/libcfs/parameters/libcfs_debug_mb"}, + { "console_min_delay_centisecs", + "/sys/module/libcfs/parameters/libcfs_console_min_delay"}, + { "console_max_delay_centisecs", + "/sys/module/libcfs/parameters/libcfs_console_max_delay"}, + {}, +}; + +static ssize_t lnet_debugfs_read(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + struct ctl_table *table = filp->private_data; + int error; + + error = table->proc_handler(table, 0, (void __user *)buf, &count, ppos); + if (!error) + error = count; + + return error; +} + +static ssize_t lnet_debugfs_write(struct file *filp, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct ctl_table *table = filp->private_data; + int error; + + error = table->proc_handler(table, 1, (void __user *)buf, &count, ppos); + if (!error) + error = count; + + return error; +} + +static const struct file_operations lnet_debugfs_file_operations_rw = { + .open = simple_open, + .read = lnet_debugfs_read, + .write = lnet_debugfs_write, + .llseek = default_llseek, +}; + +static const struct file_operations lnet_debugfs_file_operations_ro = { + .open = simple_open, + .read = lnet_debugfs_read, + .llseek = default_llseek, +}; + +static const struct file_operations lnet_debugfs_file_operations_wo = { + .open = simple_open, + .write = lnet_debugfs_write, + .llseek = default_llseek, +}; + +static const struct file_operations *lnet_debugfs_fops_select(umode_t mode) +{ + if (!(mode & S_IWUGO)) + return &lnet_debugfs_file_operations_ro; + + if (!(mode & S_IRUGO)) + return &lnet_debugfs_file_operations_wo; + + return &lnet_debugfs_file_operations_rw; +} + +void lustre_insert_debugfs(struct ctl_table *table, + const struct lnet_debugfs_symlink_def *symlinks) +{ + if (!lnet_debugfs_root) + lnet_debugfs_root = debugfs_create_dir("lnet", NULL); + + /* Even if we cannot create, just ignore it altogether) */ + if (IS_ERR_OR_NULL(lnet_debugfs_root)) + return; + + /* We don't save the dentry returned in next two calls, because + * we don't call debugfs_remove() but rather remove_recursive() + */ + for (; table->procname; table++) + debugfs_create_file(table->procname, table->mode, + lnet_debugfs_root, table, + lnet_debugfs_fops_select(table->mode)); + + for (; symlinks && symlinks->name; symlinks++) + debugfs_create_symlink(symlinks->name, lnet_debugfs_root, + symlinks->target); +} +EXPORT_SYMBOL_GPL(lustre_insert_debugfs); + +static void lustre_remove_debugfs(void) +{ + debugfs_remove_recursive(lnet_debugfs_root); + + lnet_debugfs_root = NULL; +} + +static int libcfs_init(void) +{ + int rc; + + rc = libcfs_debug_init(5 * 1024 * 1024); + if (rc < 0) { + pr_err("LustreError: libcfs_debug_init: %d\n", rc); + return rc; + } + + rc = cfs_cpu_init(); + if (rc != 0) + goto cleanup_debug; + + rc = misc_register(&libcfs_dev); + if (rc) { + CERROR("misc_register: error %d\n", rc); + goto cleanup_cpu; + } + + rc = cfs_wi_startup(); + if (rc) { + CERROR("initialize workitem: error %d\n", rc); + goto cleanup_deregister; + } + + /* max to 4 threads, should be enough for rehash */ + rc = min(cfs_cpt_weight(cfs_cpt_table, CFS_CPT_ANY), 4); + rc = cfs_wi_sched_create("cfs_rh", cfs_cpt_table, CFS_CPT_ANY, + rc, &cfs_sched_rehash); + if (rc != 0) { + CERROR("Startup workitem scheduler: error: %d\n", rc); + goto cleanup_deregister; + } + + rc = cfs_crypto_register(); + if (rc) { + CERROR("cfs_crypto_register: error %d\n", rc); + goto cleanup_wi; + } + + lustre_insert_debugfs(lnet_table, lnet_debugfs_symlinks); + + CDEBUG(D_OTHER, "portals setup OK\n"); + return 0; + cleanup_wi: + cfs_wi_shutdown(); + cleanup_deregister: + misc_deregister(&libcfs_dev); +cleanup_cpu: + cfs_cpu_fini(); + cleanup_debug: + libcfs_debug_cleanup(); + return rc; +} + +static void libcfs_exit(void) +{ + int rc; + + lustre_remove_debugfs(); + + if (cfs_sched_rehash) { + cfs_wi_sched_destroy(cfs_sched_rehash); + cfs_sched_rehash = NULL; + } + + cfs_crypto_unregister(); + cfs_wi_shutdown(); + + misc_deregister(&libcfs_dev); + + cfs_cpu_fini(); + + rc = libcfs_debug_cleanup(); + if (rc) + pr_err("LustreError: libcfs_debug_cleanup: %d\n", rc); +} + +MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>"); +MODULE_DESCRIPTION("Lustre helper library"); +MODULE_VERSION(LIBCFS_VERSION); +MODULE_LICENSE("GPL"); + +module_init(libcfs_init); +module_exit(libcfs_exit); diff --git a/drivers/staging/lustre/lnet/libcfs/prng.c b/drivers/staging/lustre/lnet/libcfs/prng.c new file mode 100644 index 000000000..c75ae9a68 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/prng.c @@ -0,0 +1,140 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/libcfs/prng.c + * + * concatenation of following two 16-bit multiply with carry generators + * x(n)=a*x(n-1)+carry mod 2^16 and y(n)=b*y(n-1)+carry mod 2^16, + * number and carry packed within the same 32 bit integer. + * algorithm recommended by Marsaglia +*/ + +#include "../../include/linux/libcfs/libcfs.h" + +/* + * From: George Marsaglia <geo@stat.fsu.edu> + * Newsgroups: sci.math + * Subject: Re: A RANDOM NUMBER GENERATOR FOR C + * Date: Tue, 30 Sep 1997 05:29:35 -0700 + * + * You may replace the two constants 36969 and 18000 by any + * pair of distinct constants from this list: + * 18000 18030 18273 18513 18879 19074 19098 19164 19215 19584 + * 19599 19950 20088 20508 20544 20664 20814 20970 21153 21243 + * 21423 21723 21954 22125 22188 22293 22860 22938 22965 22974 + * 23109 23124 23163 23208 23508 23520 23553 23658 23865 24114 + * 24219 24660 24699 24864 24948 25023 25308 25443 26004 26088 + * 26154 26550 26679 26838 27183 27258 27753 27795 27810 27834 + * 27960 28320 28380 28689 28710 28794 28854 28959 28980 29013 + * 29379 29889 30135 30345 30459 30714 30903 30963 31059 31083 + * (or any other 16-bit constants k for which both k*2^16-1 + * and k*2^15-1 are prime) + */ + +#define RANDOM_CONST_A 18030 +#define RANDOM_CONST_B 29013 + +static unsigned int seed_x = 521288629; +static unsigned int seed_y = 362436069; + +/** + * cfs_rand - creates new seeds + * + * First it creates new seeds from the previous seeds. Then it generates a + * new pseudo random number for use. + * + * Returns a pseudo-random 32-bit integer + */ +unsigned int cfs_rand(void) +{ + seed_x = RANDOM_CONST_A * (seed_x & 65535) + (seed_x >> 16); + seed_y = RANDOM_CONST_B * (seed_y & 65535) + (seed_y >> 16); + + return ((seed_x << 16) + (seed_y & 65535)); +} +EXPORT_SYMBOL(cfs_rand); + +/** + * cfs_srand - sets the initial seed + * @seed1 : (seed_x) should have the most entropy in the low bits of the word + * @seed2 : (seed_y) should have the most entropy in the high bits of the word + * + * Replaces the original seeds with new values. Used to generate a new pseudo + * random numbers. + */ +void cfs_srand(unsigned int seed1, unsigned int seed2) +{ + if (seed1) + seed_x = seed1; /* use default seeds if parameter is 0 */ + if (seed2) + seed_y = seed2; +} +EXPORT_SYMBOL(cfs_srand); + +/** + * cfs_get_random_bytes - generate a bunch of random numbers + * @buf : buffer to fill with random numbers + * @size: size of passed in buffer + * + * Fills a buffer with random bytes + */ +void cfs_get_random_bytes(void *buf, int size) +{ + int *p = buf; + int rem, tmp; + + LASSERT(size >= 0); + + rem = min((int)((unsigned long)buf & (sizeof(int) - 1)), size); + if (rem) { + get_random_bytes(&tmp, sizeof(tmp)); + tmp ^= cfs_rand(); + memcpy(buf, &tmp, rem); + p = buf + rem; + size -= rem; + } + + while (size >= sizeof(int)) { + get_random_bytes(&tmp, sizeof(tmp)); + *p = cfs_rand() ^ tmp; + size -= sizeof(int); + p++; + } + buf = p; + if (size) { + get_random_bytes(&tmp, sizeof(tmp)); + tmp ^= cfs_rand(); + memcpy(buf, &tmp, size); + } +} +EXPORT_SYMBOL(cfs_get_random_bytes); diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.c b/drivers/staging/lustre/lnet/libcfs/tracefile.c new file mode 100644 index 000000000..244eb89ee --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/tracefile.c @@ -0,0 +1,1208 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/libcfs/tracefile.c + * + * Author: Zach Brown <zab@clusterfs.com> + * Author: Phil Schwan <phil@clusterfs.com> + */ + +#define DEBUG_SUBSYSTEM S_LNET +#define LUSTRE_TRACEFILE_PRIVATE +#include "tracefile.h" + +#include "../../include/linux/libcfs/libcfs.h" + +/* XXX move things up to the top, comment */ +union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS] __cacheline_aligned; + +char cfs_tracefile[TRACEFILE_NAME_SIZE]; +long long cfs_tracefile_size = CFS_TRACEFILE_SIZE; +static struct tracefiled_ctl trace_tctl; +static DEFINE_MUTEX(cfs_trace_thread_mutex); +static int thread_running; + +static atomic_t cfs_tage_allocated = ATOMIC_INIT(0); + +struct page_collection { + struct list_head pc_pages; + /* + * if this flag is set, collect_pages() will spill both + * ->tcd_daemon_pages and ->tcd_pages to the ->pc_pages. Otherwise, + * only ->tcd_pages are spilled. + */ + int pc_want_daemon_pages; +}; + +struct tracefiled_ctl { + struct completion tctl_start; + struct completion tctl_stop; + wait_queue_head_t tctl_waitq; + pid_t tctl_pid; + atomic_t tctl_shutdown; +}; + +/* + * small data-structure for each page owned by tracefiled. + */ +struct cfs_trace_page { + /* + * page itself + */ + struct page *page; + /* + * linkage into one of the lists in trace_data_union or + * page_collection + */ + struct list_head linkage; + /* + * number of bytes used within this page + */ + unsigned int used; + /* + * cpu that owns this page + */ + unsigned short cpu; + /* + * type(context) of this page + */ + unsigned short type; +}; + +static void put_pages_on_tcd_daemon_list(struct page_collection *pc, + struct cfs_trace_cpu_data *tcd); + +static inline struct cfs_trace_page * +cfs_tage_from_list(struct list_head *list) +{ + return list_entry(list, struct cfs_trace_page, linkage); +} + +static struct cfs_trace_page *cfs_tage_alloc(gfp_t gfp) +{ + struct page *page; + struct cfs_trace_page *tage; + + /* My caller is trying to free memory */ + if (!in_interrupt() && memory_pressure_get()) + return NULL; + + /* + * Don't spam console with allocation failures: they will be reported + * by upper layer anyway. + */ + gfp |= __GFP_NOWARN; + page = alloc_page(gfp); + if (!page) + return NULL; + + tage = kmalloc(sizeof(*tage), gfp); + if (!tage) { + __free_page(page); + return NULL; + } + + tage->page = page; + atomic_inc(&cfs_tage_allocated); + return tage; +} + +static void cfs_tage_free(struct cfs_trace_page *tage) +{ + __free_page(tage->page); + kfree(tage); + atomic_dec(&cfs_tage_allocated); +} + +static void cfs_tage_to_tail(struct cfs_trace_page *tage, + struct list_head *queue) +{ + list_move_tail(&tage->linkage, queue); +} + +int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp, + struct list_head *stock) +{ + int i; + + /* + * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + + for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++i) { + struct cfs_trace_page *tage; + + tage = cfs_tage_alloc(gfp); + if (!tage) + break; + list_add_tail(&tage->linkage, stock); + } + return i; +} + +/* return a page that has 'len' bytes left at the end */ +static struct cfs_trace_page * +cfs_trace_get_tage_try(struct cfs_trace_cpu_data *tcd, unsigned long len) +{ + struct cfs_trace_page *tage; + + if (tcd->tcd_cur_pages > 0) { + __LASSERT(!list_empty(&tcd->tcd_pages)); + tage = cfs_tage_from_list(tcd->tcd_pages.prev); + if (tage->used + len <= PAGE_SIZE) + return tage; + } + + if (tcd->tcd_cur_pages < tcd->tcd_max_pages) { + if (tcd->tcd_cur_stock_pages > 0) { + tage = cfs_tage_from_list(tcd->tcd_stock_pages.prev); + --tcd->tcd_cur_stock_pages; + list_del_init(&tage->linkage); + } else { + tage = cfs_tage_alloc(GFP_ATOMIC); + if (unlikely(!tage)) { + if ((!memory_pressure_get() || + in_interrupt()) && printk_ratelimit()) + printk(KERN_WARNING + "cannot allocate a tage (%ld)\n", + tcd->tcd_cur_pages); + return NULL; + } + } + + tage->used = 0; + tage->cpu = smp_processor_id(); + tage->type = tcd->tcd_type; + list_add_tail(&tage->linkage, &tcd->tcd_pages); + tcd->tcd_cur_pages++; + + if (tcd->tcd_cur_pages > 8 && thread_running) { + struct tracefiled_ctl *tctl = &trace_tctl; + /* + * wake up tracefiled to process some pages. + */ + wake_up(&tctl->tctl_waitq); + } + return tage; + } + return NULL; +} + +static void cfs_tcd_shrink(struct cfs_trace_cpu_data *tcd) +{ + int pgcount = tcd->tcd_cur_pages / 10; + struct page_collection pc; + struct cfs_trace_page *tage; + struct cfs_trace_page *tmp; + + /* + * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + + if (printk_ratelimit()) + printk(KERN_WARNING "debug daemon buffer overflowed; discarding 10%% of pages (%d of %ld)\n", + pgcount + 1, tcd->tcd_cur_pages); + + INIT_LIST_HEAD(&pc.pc_pages); + + list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) { + if (pgcount-- == 0) + break; + + list_move_tail(&tage->linkage, &pc.pc_pages); + tcd->tcd_cur_pages--; + } + put_pages_on_tcd_daemon_list(&pc, tcd); +} + +/* return a page that has 'len' bytes left at the end */ +static struct cfs_trace_page *cfs_trace_get_tage(struct cfs_trace_cpu_data *tcd, + unsigned long len) +{ + struct cfs_trace_page *tage; + + /* + * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + + if (len > PAGE_SIZE) { + pr_err("cowardly refusing to write %lu bytes in a page\n", len); + return NULL; + } + + tage = cfs_trace_get_tage_try(tcd, len); + if (tage) + return tage; + if (thread_running) + cfs_tcd_shrink(tcd); + if (tcd->tcd_cur_pages > 0) { + tage = cfs_tage_from_list(tcd->tcd_pages.next); + tage->used = 0; + cfs_tage_to_tail(tage, &tcd->tcd_pages); + } + return tage; +} + +int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata, + const char *format, ...) +{ + va_list args; + int rc; + + va_start(args, format); + rc = libcfs_debug_vmsg2(msgdata, format, args, NULL); + va_end(args); + + return rc; +} +EXPORT_SYMBOL(libcfs_debug_msg); + +int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata, + const char *format1, va_list args, + const char *format2, ...) +{ + struct cfs_trace_cpu_data *tcd = NULL; + struct ptldebug_header header = {0}; + struct cfs_trace_page *tage; + /* string_buf is used only if tcd != NULL, and is always set then */ + char *string_buf = NULL; + char *debug_buf; + int known_size; + int needed = 85; /* average message length */ + int max_nob; + va_list ap; + int depth; + int i; + int remain; + int mask = msgdata->msg_mask; + const char *file = kbasename(msgdata->msg_file); + struct cfs_debug_limit_state *cdls = msgdata->msg_cdls; + + tcd = cfs_trace_get_tcd(); + + /* cfs_trace_get_tcd() grabs a lock, which disables preemption and + * pins us to a particular CPU. This avoids an smp_processor_id() + * warning on Linux when debugging is enabled. + */ + cfs_set_ptldebug_header(&header, msgdata, CDEBUG_STACK()); + + if (!tcd) /* arch may not log in IRQ context */ + goto console; + + if (tcd->tcd_cur_pages == 0) + header.ph_flags |= PH_FLAG_FIRST_RECORD; + + if (tcd->tcd_shutting_down) { + cfs_trace_put_tcd(tcd); + tcd = NULL; + goto console; + } + + depth = __current_nesting_level(); + known_size = strlen(file) + 1 + depth; + if (msgdata->msg_fn) + known_size += strlen(msgdata->msg_fn) + 1; + + if (libcfs_debug_binary) + known_size += sizeof(header); + + /* + * '2' used because vsnprintf return real size required for output + * _without_ terminating NULL. + * if needed is to small for this format. + */ + for (i = 0; i < 2; i++) { + tage = cfs_trace_get_tage(tcd, needed + known_size + 1); + if (!tage) { + if (needed + known_size > PAGE_SIZE) + mask |= D_ERROR; + + cfs_trace_put_tcd(tcd); + tcd = NULL; + goto console; + } + + string_buf = (char *)page_address(tage->page) + + tage->used + known_size; + + max_nob = PAGE_SIZE - tage->used - known_size; + if (max_nob <= 0) { + printk(KERN_EMERG "negative max_nob: %d\n", + max_nob); + mask |= D_ERROR; + cfs_trace_put_tcd(tcd); + tcd = NULL; + goto console; + } + + needed = 0; + if (format1) { + va_copy(ap, args); + needed = vsnprintf(string_buf, max_nob, format1, ap); + va_end(ap); + } + + if (format2) { + remain = max_nob - needed; + if (remain < 0) + remain = 0; + + va_start(ap, format2); + needed += vsnprintf(string_buf + needed, remain, + format2, ap); + va_end(ap); + } + + if (needed < max_nob) /* well. printing ok.. */ + break; + } + + if (*(string_buf + needed - 1) != '\n') + printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n", + file, msgdata->msg_line, msgdata->msg_fn); + + header.ph_len = known_size + needed; + debug_buf = (char *)page_address(tage->page) + tage->used; + + if (libcfs_debug_binary) { + memcpy(debug_buf, &header, sizeof(header)); + tage->used += sizeof(header); + debug_buf += sizeof(header); + } + + /* indent message according to the nesting level */ + while (depth-- > 0) { + *(debug_buf++) = '.'; + ++tage->used; + } + + strcpy(debug_buf, file); + tage->used += strlen(file) + 1; + debug_buf += strlen(file) + 1; + + if (msgdata->msg_fn) { + strcpy(debug_buf, msgdata->msg_fn); + tage->used += strlen(msgdata->msg_fn) + 1; + debug_buf += strlen(msgdata->msg_fn) + 1; + } + + __LASSERT(debug_buf == string_buf); + + tage->used += needed; + __LASSERT(tage->used <= PAGE_SIZE); + +console: + if ((mask & libcfs_printk) == 0) { + /* no console output requested */ + if (tcd) + cfs_trace_put_tcd(tcd); + return 1; + } + + if (cdls) { + if (libcfs_console_ratelimit && + cdls->cdls_next != 0 && /* not first time ever */ + !cfs_time_after(cfs_time_current(), cdls->cdls_next)) { + /* skipping a console message */ + cdls->cdls_count++; + if (tcd) + cfs_trace_put_tcd(tcd); + return 1; + } + + if (cfs_time_after(cfs_time_current(), + cdls->cdls_next + libcfs_console_max_delay + + cfs_time_seconds(10))) { + /* last timeout was a long time ago */ + cdls->cdls_delay /= libcfs_console_backoff * 4; + } else { + cdls->cdls_delay *= libcfs_console_backoff; + } + + if (cdls->cdls_delay < libcfs_console_min_delay) + cdls->cdls_delay = libcfs_console_min_delay; + else if (cdls->cdls_delay > libcfs_console_max_delay) + cdls->cdls_delay = libcfs_console_max_delay; + + /* ensure cdls_next is never zero after it's been seen */ + cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1; + } + + if (tcd) { + cfs_print_to_console(&header, mask, string_buf, needed, file, + msgdata->msg_fn); + cfs_trace_put_tcd(tcd); + } else { + string_buf = cfs_trace_get_console_buffer(); + + needed = 0; + if (format1) { + va_copy(ap, args); + needed = vsnprintf(string_buf, + CFS_TRACE_CONSOLE_BUFFER_SIZE, + format1, ap); + va_end(ap); + } + if (format2) { + remain = CFS_TRACE_CONSOLE_BUFFER_SIZE - needed; + if (remain > 0) { + va_start(ap, format2); + needed += vsnprintf(string_buf + needed, remain, + format2, ap); + va_end(ap); + } + } + cfs_print_to_console(&header, mask, + string_buf, needed, file, msgdata->msg_fn); + + put_cpu(); + } + + if (cdls && cdls->cdls_count != 0) { + string_buf = cfs_trace_get_console_buffer(); + + needed = snprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE, + "Skipped %d previous similar message%s\n", + cdls->cdls_count, + (cdls->cdls_count > 1) ? "s" : ""); + + cfs_print_to_console(&header, mask, + string_buf, needed, file, msgdata->msg_fn); + + put_cpu(); + cdls->cdls_count = 0; + } + + return 0; +} +EXPORT_SYMBOL(libcfs_debug_vmsg2); + +void +cfs_trace_assertion_failed(const char *str, + struct libcfs_debug_msg_data *msgdata) +{ + struct ptldebug_header hdr; + + libcfs_panic_in_progress = 1; + libcfs_catastrophe = 1; + mb(); + + cfs_set_ptldebug_header(&hdr, msgdata, CDEBUG_STACK()); + + cfs_print_to_console(&hdr, D_EMERG, str, strlen(str), + msgdata->msg_file, msgdata->msg_fn); + + panic("Lustre debug assertion failure\n"); + + /* not reached */ +} + +static void +panic_collect_pages(struct page_collection *pc) +{ + /* Do the collect_pages job on a single CPU: assumes that all other + * CPUs have been stopped during a panic. If this isn't true for some + * arch, this will have to be implemented separately in each arch. + */ + int i; + int j; + struct cfs_trace_cpu_data *tcd; + + INIT_LIST_HEAD(&pc->pc_pages); + + cfs_tcd_for_each(tcd, i, j) { + list_splice_init(&tcd->tcd_pages, &pc->pc_pages); + tcd->tcd_cur_pages = 0; + + if (pc->pc_want_daemon_pages) { + list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages); + tcd->tcd_cur_daemon_pages = 0; + } + } +} + +static void collect_pages_on_all_cpus(struct page_collection *pc) +{ + struct cfs_trace_cpu_data *tcd; + int i, cpu; + + for_each_possible_cpu(cpu) { + cfs_tcd_for_each_type_lock(tcd, i, cpu) { + list_splice_init(&tcd->tcd_pages, &pc->pc_pages); + tcd->tcd_cur_pages = 0; + if (pc->pc_want_daemon_pages) { + list_splice_init(&tcd->tcd_daemon_pages, + &pc->pc_pages); + tcd->tcd_cur_daemon_pages = 0; + } + } + } +} + +static void collect_pages(struct page_collection *pc) +{ + INIT_LIST_HEAD(&pc->pc_pages); + + if (libcfs_panic_in_progress) + panic_collect_pages(pc); + else + collect_pages_on_all_cpus(pc); +} + +static void put_pages_back_on_all_cpus(struct page_collection *pc) +{ + struct cfs_trace_cpu_data *tcd; + struct list_head *cur_head; + struct cfs_trace_page *tage; + struct cfs_trace_page *tmp; + int i, cpu; + + for_each_possible_cpu(cpu) { + cfs_tcd_for_each_type_lock(tcd, i, cpu) { + cur_head = tcd->tcd_pages.next; + + list_for_each_entry_safe(tage, tmp, &pc->pc_pages, + linkage) { + __LASSERT_TAGE_INVARIANT(tage); + + if (tage->cpu != cpu || tage->type != i) + continue; + + cfs_tage_to_tail(tage, cur_head); + tcd->tcd_cur_pages++; + } + } + } +} + +static void put_pages_back(struct page_collection *pc) +{ + if (!libcfs_panic_in_progress) + put_pages_back_on_all_cpus(pc); +} + +/* Add pages to a per-cpu debug daemon ringbuffer. This buffer makes sure that + * we have a good amount of data at all times for dumping during an LBUG, even + * if we have been steadily writing (and otherwise discarding) pages via the + * debug daemon. + */ +static void put_pages_on_tcd_daemon_list(struct page_collection *pc, + struct cfs_trace_cpu_data *tcd) +{ + struct cfs_trace_page *tage; + struct cfs_trace_page *tmp; + + list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) { + __LASSERT_TAGE_INVARIANT(tage); + + if (tage->cpu != tcd->tcd_cpu || tage->type != tcd->tcd_type) + continue; + + cfs_tage_to_tail(tage, &tcd->tcd_daemon_pages); + tcd->tcd_cur_daemon_pages++; + + if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) { + struct cfs_trace_page *victim; + + __LASSERT(!list_empty(&tcd->tcd_daemon_pages)); + victim = cfs_tage_from_list(tcd->tcd_daemon_pages.next); + + __LASSERT_TAGE_INVARIANT(victim); + + list_del(&victim->linkage); + cfs_tage_free(victim); + tcd->tcd_cur_daemon_pages--; + } + } +} + +static void put_pages_on_daemon_list(struct page_collection *pc) +{ + struct cfs_trace_cpu_data *tcd; + int i, cpu; + + for_each_possible_cpu(cpu) { + cfs_tcd_for_each_type_lock(tcd, i, cpu) + put_pages_on_tcd_daemon_list(pc, tcd); + } +} + +void cfs_trace_debug_print(void) +{ + struct page_collection pc; + struct cfs_trace_page *tage; + struct cfs_trace_page *tmp; + + pc.pc_want_daemon_pages = 1; + collect_pages(&pc); + list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { + char *p, *file, *fn; + struct page *page; + + __LASSERT_TAGE_INVARIANT(tage); + + page = tage->page; + p = page_address(page); + while (p < ((char *)page_address(page) + tage->used)) { + struct ptldebug_header *hdr; + int len; + + hdr = (void *)p; + p += sizeof(*hdr); + file = p; + p += strlen(file) + 1; + fn = p; + p += strlen(fn) + 1; + len = hdr->ph_len - (int)(p - (char *)hdr); + + cfs_print_to_console(hdr, D_EMERG, p, len, file, fn); + + p += len; + } + + list_del(&tage->linkage); + cfs_tage_free(tage); + } +} + +int cfs_tracefile_dump_all_pages(char *filename) +{ + struct page_collection pc; + struct file *filp; + struct cfs_trace_page *tage; + struct cfs_trace_page *tmp; + char *buf; + int rc; + + DECL_MMSPACE; + + cfs_tracefile_write_lock(); + + filp = filp_open(filename, O_CREAT | O_EXCL | O_WRONLY | O_LARGEFILE, + 0600); + if (IS_ERR(filp)) { + rc = PTR_ERR(filp); + filp = NULL; + pr_err("LustreError: can't open %s for dump: rc %d\n", + filename, rc); + goto out; + } + + pc.pc_want_daemon_pages = 1; + collect_pages(&pc); + if (list_empty(&pc.pc_pages)) { + rc = 0; + goto close; + } + + /* ok, for now, just write the pages. in the future we'll be building + * iobufs with the pages and calling generic_direct_IO + */ + MMSPACE_OPEN; + list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { + __LASSERT_TAGE_INVARIANT(tage); + + buf = kmap(tage->page); + rc = vfs_write(filp, (__force const char __user *)buf, + tage->used, &filp->f_pos); + kunmap(tage->page); + + if (rc != (int)tage->used) { + printk(KERN_WARNING "wanted to write %u but wrote %d\n", + tage->used, rc); + put_pages_back(&pc); + __LASSERT(list_empty(&pc.pc_pages)); + break; + } + list_del(&tage->linkage); + cfs_tage_free(tage); + } + MMSPACE_CLOSE; + rc = vfs_fsync(filp, 1); + if (rc) + pr_err("sync returns %d\n", rc); +close: + filp_close(filp, NULL); +out: + cfs_tracefile_write_unlock(); + return rc; +} + +void cfs_trace_flush_pages(void) +{ + struct page_collection pc; + struct cfs_trace_page *tage; + struct cfs_trace_page *tmp; + + pc.pc_want_daemon_pages = 1; + collect_pages(&pc); + list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { + __LASSERT_TAGE_INVARIANT(tage); + + list_del(&tage->linkage); + cfs_tage_free(tage); + } +} + +int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob, + const char __user *usr_buffer, int usr_buffer_nob) +{ + int nob; + + if (usr_buffer_nob > knl_buffer_nob) + return -EOVERFLOW; + + if (copy_from_user((void *)knl_buffer, + usr_buffer, usr_buffer_nob)) + return -EFAULT; + + nob = strnlen(knl_buffer, usr_buffer_nob); + while (nob-- >= 0) /* strip trailing whitespace */ + if (!isspace(knl_buffer[nob])) + break; + + if (nob < 0) /* empty string */ + return -EINVAL; + + if (nob == knl_buffer_nob) /* no space to terminate */ + return -EOVERFLOW; + + knl_buffer[nob + 1] = 0; /* terminate */ + return 0; +} +EXPORT_SYMBOL(cfs_trace_copyin_string); + +int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob, + const char *knl_buffer, char *append) +{ + /* + * NB if 'append' != NULL, it's a single character to append to the + * copied out string - usually "\n" or "" (i.e. a terminating zero byte) + */ + int nob = strlen(knl_buffer); + + if (nob > usr_buffer_nob) + nob = usr_buffer_nob; + + if (copy_to_user(usr_buffer, knl_buffer, nob)) + return -EFAULT; + + if (append && nob < usr_buffer_nob) { + if (copy_to_user(usr_buffer + nob, append, 1)) + return -EFAULT; + + nob++; + } + + return nob; +} +EXPORT_SYMBOL(cfs_trace_copyout_string); + +int cfs_trace_allocate_string_buffer(char **str, int nob) +{ + if (nob > 2 * PAGE_SIZE) /* string must be "sensible" */ + return -EINVAL; + + *str = kmalloc(nob, GFP_KERNEL | __GFP_ZERO); + if (!*str) + return -ENOMEM; + + return 0; +} + +int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob) +{ + char *str; + int rc; + + rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1); + if (rc != 0) + return rc; + + rc = cfs_trace_copyin_string(str, usr_str_nob + 1, + usr_str, usr_str_nob); + if (rc != 0) + goto out; + + if (str[0] != '/') { + rc = -EINVAL; + goto out; + } + rc = cfs_tracefile_dump_all_pages(str); +out: + kfree(str); + return rc; +} + +int cfs_trace_daemon_command(char *str) +{ + int rc = 0; + + cfs_tracefile_write_lock(); + + if (strcmp(str, "stop") == 0) { + cfs_tracefile_write_unlock(); + cfs_trace_stop_thread(); + cfs_tracefile_write_lock(); + memset(cfs_tracefile, 0, sizeof(cfs_tracefile)); + + } else if (strncmp(str, "size=", 5) == 0) { + unsigned long tmp; + + rc = kstrtoul(str + 5, 10, &tmp); + if (!rc) { + if (tmp < 10 || tmp > 20480) + cfs_tracefile_size = CFS_TRACEFILE_SIZE; + else + cfs_tracefile_size = tmp << 20; + } + } else if (strlen(str) >= sizeof(cfs_tracefile)) { + rc = -ENAMETOOLONG; + } else if (str[0] != '/') { + rc = -EINVAL; + } else { + strcpy(cfs_tracefile, str); + + printk(KERN_INFO + "Lustre: debug daemon will attempt to start writing to %s (%lukB max)\n", + cfs_tracefile, + (long)(cfs_tracefile_size >> 10)); + + cfs_trace_start_thread(); + } + + cfs_tracefile_write_unlock(); + return rc; +} + +int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob) +{ + char *str; + int rc; + + rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1); + if (rc != 0) + return rc; + + rc = cfs_trace_copyin_string(str, usr_str_nob + 1, + usr_str, usr_str_nob); + if (rc == 0) + rc = cfs_trace_daemon_command(str); + + kfree(str); + return rc; +} + +int cfs_trace_set_debug_mb(int mb) +{ + int i; + int j; + int pages; + int limit = cfs_trace_max_debug_mb(); + struct cfs_trace_cpu_data *tcd; + + if (mb < num_possible_cpus()) { + printk(KERN_WARNING + "Lustre: %d MB is too small for debug buffer size, setting it to %d MB.\n", + mb, num_possible_cpus()); + mb = num_possible_cpus(); + } + + if (mb > limit) { + printk(KERN_WARNING + "Lustre: %d MB is too large for debug buffer size, setting it to %d MB.\n", + mb, limit); + mb = limit; + } + + mb /= num_possible_cpus(); + pages = mb << (20 - PAGE_SHIFT); + + cfs_tracefile_write_lock(); + + cfs_tcd_for_each(tcd, i, j) + tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100; + + cfs_tracefile_write_unlock(); + + return 0; +} + +int cfs_trace_get_debug_mb(void) +{ + int i; + int j; + struct cfs_trace_cpu_data *tcd; + int total_pages = 0; + + cfs_tracefile_read_lock(); + + cfs_tcd_for_each(tcd, i, j) + total_pages += tcd->tcd_max_pages; + + cfs_tracefile_read_unlock(); + + return (total_pages >> (20 - PAGE_SHIFT)) + 1; +} + +static int tracefiled(void *arg) +{ + struct page_collection pc; + struct tracefiled_ctl *tctl = arg; + struct cfs_trace_page *tage; + struct cfs_trace_page *tmp; + struct file *filp; + char *buf; + int last_loop = 0; + int rc; + + DECL_MMSPACE; + + /* we're started late enough that we pick up init's fs context */ + /* this is so broken in uml? what on earth is going on? */ + + complete(&tctl->tctl_start); + + while (1) { + wait_queue_t __wait; + + pc.pc_want_daemon_pages = 0; + collect_pages(&pc); + if (list_empty(&pc.pc_pages)) + goto end_loop; + + filp = NULL; + cfs_tracefile_read_lock(); + if (cfs_tracefile[0] != 0) { + filp = filp_open(cfs_tracefile, + O_CREAT | O_RDWR | O_LARGEFILE, + 0600); + if (IS_ERR(filp)) { + rc = PTR_ERR(filp); + filp = NULL; + printk(KERN_WARNING "couldn't open %s: %d\n", + cfs_tracefile, rc); + } + } + cfs_tracefile_read_unlock(); + if (!filp) { + put_pages_on_daemon_list(&pc); + __LASSERT(list_empty(&pc.pc_pages)); + goto end_loop; + } + + MMSPACE_OPEN; + + list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { + static loff_t f_pos; + + __LASSERT_TAGE_INVARIANT(tage); + + if (f_pos >= (off_t)cfs_tracefile_size) + f_pos = 0; + else if (f_pos > i_size_read(file_inode(filp))) + f_pos = i_size_read(file_inode(filp)); + + buf = kmap(tage->page); + rc = vfs_write(filp, (__force const char __user *)buf, + tage->used, &f_pos); + kunmap(tage->page); + + if (rc != (int)tage->used) { + printk(KERN_WARNING "wanted to write %u but wrote %d\n", + tage->used, rc); + put_pages_back(&pc); + __LASSERT(list_empty(&pc.pc_pages)); + break; + } + } + MMSPACE_CLOSE; + + filp_close(filp, NULL); + put_pages_on_daemon_list(&pc); + if (!list_empty(&pc.pc_pages)) { + int i; + + printk(KERN_ALERT "Lustre: trace pages aren't empty\n"); + pr_err("total cpus(%d): ", num_possible_cpus()); + for (i = 0; i < num_possible_cpus(); i++) + if (cpu_online(i)) + pr_cont("%d(on) ", i); + else + pr_cont("%d(off) ", i); + pr_cont("\n"); + + i = 0; + list_for_each_entry_safe(tage, tmp, &pc.pc_pages, + linkage) + pr_err("page %d belongs to cpu %d\n", + ++i, tage->cpu); + pr_err("There are %d pages unwritten\n", i); + } + __LASSERT(list_empty(&pc.pc_pages)); +end_loop: + if (atomic_read(&tctl->tctl_shutdown)) { + if (last_loop == 0) { + last_loop = 1; + continue; + } else { + break; + } + } + init_waitqueue_entry(&__wait, current); + add_wait_queue(&tctl->tctl_waitq, &__wait); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(cfs_time_seconds(1)); + remove_wait_queue(&tctl->tctl_waitq, &__wait); + } + complete(&tctl->tctl_stop); + return 0; +} + +int cfs_trace_start_thread(void) +{ + struct tracefiled_ctl *tctl = &trace_tctl; + struct task_struct *task; + int rc = 0; + + mutex_lock(&cfs_trace_thread_mutex); + if (thread_running) + goto out; + + init_completion(&tctl->tctl_start); + init_completion(&tctl->tctl_stop); + init_waitqueue_head(&tctl->tctl_waitq); + atomic_set(&tctl->tctl_shutdown, 0); + + task = kthread_run(tracefiled, tctl, "ktracefiled"); + if (IS_ERR(task)) { + rc = PTR_ERR(task); + goto out; + } + + wait_for_completion(&tctl->tctl_start); + thread_running = 1; +out: + mutex_unlock(&cfs_trace_thread_mutex); + return rc; +} + +void cfs_trace_stop_thread(void) +{ + struct tracefiled_ctl *tctl = &trace_tctl; + + mutex_lock(&cfs_trace_thread_mutex); + if (thread_running) { + printk(KERN_INFO + "Lustre: shutting down debug daemon thread...\n"); + atomic_set(&tctl->tctl_shutdown, 1); + wait_for_completion(&tctl->tctl_stop); + thread_running = 0; + } + mutex_unlock(&cfs_trace_thread_mutex); +} + +int cfs_tracefile_init(int max_pages) +{ + struct cfs_trace_cpu_data *tcd; + int i; + int j; + int rc; + int factor; + + rc = cfs_tracefile_init_arch(); + if (rc != 0) + return rc; + + cfs_tcd_for_each(tcd, i, j) { + /* tcd_pages_factor is initialized int tracefile_init_arch. */ + factor = tcd->tcd_pages_factor; + INIT_LIST_HEAD(&tcd->tcd_pages); + INIT_LIST_HEAD(&tcd->tcd_stock_pages); + INIT_LIST_HEAD(&tcd->tcd_daemon_pages); + tcd->tcd_cur_pages = 0; + tcd->tcd_cur_stock_pages = 0; + tcd->tcd_cur_daemon_pages = 0; + tcd->tcd_max_pages = (max_pages * factor) / 100; + LASSERT(tcd->tcd_max_pages > 0); + tcd->tcd_shutting_down = 0; + } + + return 0; +} + +static void trace_cleanup_on_all_cpus(void) +{ + struct cfs_trace_cpu_data *tcd; + struct cfs_trace_page *tage; + struct cfs_trace_page *tmp; + int i, cpu; + + for_each_possible_cpu(cpu) { + cfs_tcd_for_each_type_lock(tcd, i, cpu) { + tcd->tcd_shutting_down = 1; + + list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, + linkage) { + __LASSERT_TAGE_INVARIANT(tage); + + list_del(&tage->linkage); + cfs_tage_free(tage); + } + + tcd->tcd_cur_pages = 0; + } + } +} + +static void cfs_trace_cleanup(void) +{ + struct page_collection pc; + + INIT_LIST_HEAD(&pc.pc_pages); + + trace_cleanup_on_all_cpus(); + + cfs_tracefile_fini_arch(); +} + +void cfs_tracefile_exit(void) +{ + cfs_trace_stop_thread(); + cfs_trace_cleanup(); +} diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.h b/drivers/staging/lustre/lnet/libcfs/tracefile.h new file mode 100644 index 000000000..ac84e7f4c --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/tracefile.h @@ -0,0 +1,266 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef __LIBCFS_TRACEFILE_H__ +#define __LIBCFS_TRACEFILE_H__ + +#include "../../include/linux/libcfs/libcfs.h" + +enum cfs_trace_buf_type { + CFS_TCD_TYPE_PROC = 0, + CFS_TCD_TYPE_SOFTIRQ, + CFS_TCD_TYPE_IRQ, + CFS_TCD_TYPE_MAX +}; + +/* trace file lock routines */ + +#define TRACEFILE_NAME_SIZE 1024 +extern char cfs_tracefile[TRACEFILE_NAME_SIZE]; +extern long long cfs_tracefile_size; + +void libcfs_run_debug_log_upcall(char *file); + +int cfs_tracefile_init_arch(void); +void cfs_tracefile_fini_arch(void); + +void cfs_tracefile_read_lock(void); +void cfs_tracefile_read_unlock(void); +void cfs_tracefile_write_lock(void); +void cfs_tracefile_write_unlock(void); + +int cfs_tracefile_dump_all_pages(char *filename); +void cfs_trace_debug_print(void); +void cfs_trace_flush_pages(void); +int cfs_trace_start_thread(void); +void cfs_trace_stop_thread(void); +int cfs_tracefile_init(int max_pages); +void cfs_tracefile_exit(void); + +int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob, + const char __user *usr_buffer, int usr_buffer_nob); +int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob, + const char *knl_str, char *append); +int cfs_trace_allocate_string_buffer(char **str, int nob); +int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob); +int cfs_trace_daemon_command(char *str); +int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob); +int cfs_trace_set_debug_mb(int mb); +int cfs_trace_get_debug_mb(void); + +void libcfs_debug_dumplog_internal(void *arg); +void libcfs_register_panic_notifier(void); +void libcfs_unregister_panic_notifier(void); +extern int libcfs_panic_in_progress; +int cfs_trace_max_debug_mb(void); + +#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT)) +#define TCD_STOCK_PAGES (TCD_MAX_PAGES) +#define CFS_TRACEFILE_SIZE (500 << 20) + +#ifdef LUSTRE_TRACEFILE_PRIVATE + +/* + * Private declare for tracefile + */ +#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT)) +#define TCD_STOCK_PAGES (TCD_MAX_PAGES) + +#define CFS_TRACEFILE_SIZE (500 << 20) + +/* + * Size of a buffer for sprinting console messages if we can't get a page + * from system + */ +#define CFS_TRACE_CONSOLE_BUFFER_SIZE 1024 + +union cfs_trace_data_union { + struct cfs_trace_cpu_data { + /* + * Even though this structure is meant to be per-CPU, locking + * is needed because in some places the data may be accessed + * from other CPUs. This lock is directly used in trace_get_tcd + * and trace_put_tcd, which are called in libcfs_debug_vmsg2 and + * tcd_for_each_type_lock + */ + spinlock_t tcd_lock; + unsigned long tcd_lock_flags; + + /* + * pages with trace records not yet processed by tracefiled. + */ + struct list_head tcd_pages; + /* number of pages on ->tcd_pages */ + unsigned long tcd_cur_pages; + + /* + * pages with trace records already processed by + * tracefiled. These pages are kept in memory, so that some + * portion of log can be written in the event of LBUG. This + * list is maintained in LRU order. + * + * Pages are moved to ->tcd_daemon_pages by tracefiled() + * (put_pages_on_daemon_list()). LRU pages from this list are + * discarded when list grows too large. + */ + struct list_head tcd_daemon_pages; + /* number of pages on ->tcd_daemon_pages */ + unsigned long tcd_cur_daemon_pages; + + /* + * Maximal number of pages allowed on ->tcd_pages and + * ->tcd_daemon_pages each. + * Always TCD_MAX_PAGES * tcd_pages_factor / 100 in current + * implementation. + */ + unsigned long tcd_max_pages; + + /* + * preallocated pages to write trace records into. Pages from + * ->tcd_stock_pages are moved to ->tcd_pages by + * portals_debug_msg(). + * + * This list is necessary, because on some platforms it's + * impossible to perform efficient atomic page allocation in a + * non-blockable context. + * + * Such platforms fill ->tcd_stock_pages "on occasion", when + * tracing code is entered in blockable context. + * + * trace_get_tage_try() tries to get a page from + * ->tcd_stock_pages first and resorts to atomic page + * allocation only if this queue is empty. ->tcd_stock_pages + * is replenished when tracing code is entered in blocking + * context (darwin-tracefile.c:trace_get_tcd()). We try to + * maintain TCD_STOCK_PAGES (40 by default) pages in this + * queue. Atomic allocation is only required if more than + * TCD_STOCK_PAGES pagesful are consumed by trace records all + * emitted in non-blocking contexts. Which is quite unlikely. + */ + struct list_head tcd_stock_pages; + /* number of pages on ->tcd_stock_pages */ + unsigned long tcd_cur_stock_pages; + + unsigned short tcd_shutting_down; + unsigned short tcd_cpu; + unsigned short tcd_type; + /* The factors to share debug memory. */ + unsigned short tcd_pages_factor; + } tcd; + char __pad[L1_CACHE_ALIGN(sizeof(struct cfs_trace_cpu_data))]; +}; + +#define TCD_MAX_TYPES 8 +extern union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS]; + +#define cfs_tcd_for_each(tcd, i, j) \ + for (i = 0; cfs_trace_data[i]; i++) \ + for (j = 0, ((tcd) = &(*cfs_trace_data[i])[j].tcd); \ + j < num_possible_cpus(); \ + j++, (tcd) = &(*cfs_trace_data[i])[j].tcd) + +#define cfs_tcd_for_each_type_lock(tcd, i, cpu) \ + for (i = 0; cfs_trace_data[i] && \ + (tcd = &(*cfs_trace_data[i])[cpu].tcd) && \ + cfs_trace_lock_tcd(tcd, 1); cfs_trace_unlock_tcd(tcd, 1), i++) + +void cfs_set_ptldebug_header(struct ptldebug_header *header, + struct libcfs_debug_msg_data *m, + unsigned long stack); +void cfs_print_to_console(struct ptldebug_header *hdr, int mask, + const char *buf, int len, const char *file, + const char *fn); + +int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking); +void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking); + +extern char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX]; +enum cfs_trace_buf_type cfs_trace_buf_idx_get(void); + +static inline char * +cfs_trace_get_console_buffer(void) +{ + unsigned int i = get_cpu(); + unsigned int j = cfs_trace_buf_idx_get(); + + return cfs_trace_console_buffers[i][j]; +} + +static inline struct cfs_trace_cpu_data * +cfs_trace_get_tcd(void) +{ + struct cfs_trace_cpu_data *tcd = + &(*cfs_trace_data[cfs_trace_buf_idx_get()])[get_cpu()].tcd; + + cfs_trace_lock_tcd(tcd, 0); + + return tcd; +} + +static inline void cfs_trace_put_tcd(struct cfs_trace_cpu_data *tcd) +{ + cfs_trace_unlock_tcd(tcd, 0); + + put_cpu(); +} + +int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp, + struct list_head *stock); + +void cfs_trace_assertion_failed(const char *str, + struct libcfs_debug_msg_data *m); + +/* ASSERTION that is safe to use within the debug system */ +#define __LASSERT(cond) \ +do { \ + if (unlikely(!(cond))) { \ + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_EMERG, NULL); \ + cfs_trace_assertion_failed("ASSERTION("#cond") failed", \ + &msgdata); \ + } \ +} while (0) + +#define __LASSERT_TAGE_INVARIANT(tage) \ +do { \ + __LASSERT(tage); \ + __LASSERT(tage->page); \ + __LASSERT(tage->used <= PAGE_SIZE); \ + __LASSERT(page_count(tage->page) > 0); \ +} while (0) + +#endif /* LUSTRE_TRACEFILE_PRIVATE */ + +#endif /* __LIBCFS_TRACEFILE_H__ */ diff --git a/drivers/staging/lustre/lnet/libcfs/workitem.c b/drivers/staging/lustre/lnet/libcfs/workitem.c new file mode 100644 index 000000000..c72fe00dc --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/workitem.c @@ -0,0 +1,469 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/libcfs/workitem.c + * + * Author: Isaac Huang <isaac@clusterfs.com> + * Liang Zhen <zhen.liang@sun.com> + */ + +#define DEBUG_SUBSYSTEM S_LNET + +#include "../../include/linux/libcfs/libcfs.h" + +#define CFS_WS_NAME_LEN 16 + +struct cfs_wi_sched { + /* chain on global list */ + struct list_head ws_list; + /** serialised workitems */ + spinlock_t ws_lock; + /** where schedulers sleep */ + wait_queue_head_t ws_waitq; + /** concurrent workitems */ + struct list_head ws_runq; + /** + * rescheduled running-workitems, a workitem can be rescheduled + * while running in wi_action(), but we don't to execute it again + * unless it returns from wi_action(), so we put it on ws_rerunq + * while rescheduling, and move it to runq after it returns + * from wi_action() + */ + struct list_head ws_rerunq; + /** CPT-table for this scheduler */ + struct cfs_cpt_table *ws_cptab; + /** CPT id for affinity */ + int ws_cpt; + /** number of scheduled workitems */ + int ws_nscheduled; + /** started scheduler thread, protected by cfs_wi_data::wi_glock */ + unsigned int ws_nthreads:30; + /** shutting down, protected by cfs_wi_data::wi_glock */ + unsigned int ws_stopping:1; + /** serialize starting thread, protected by cfs_wi_data::wi_glock */ + unsigned int ws_starting:1; + /** scheduler name */ + char ws_name[CFS_WS_NAME_LEN]; +}; + +static struct cfs_workitem_data { + /** serialize */ + spinlock_t wi_glock; + /** list of all schedulers */ + struct list_head wi_scheds; + /** WI module is initialized */ + int wi_init; + /** shutting down the whole WI module */ + int wi_stopping; +} cfs_wi_data; + +static inline int +cfs_wi_sched_cansleep(struct cfs_wi_sched *sched) +{ + spin_lock(&sched->ws_lock); + if (sched->ws_stopping) { + spin_unlock(&sched->ws_lock); + return 0; + } + + if (!list_empty(&sched->ws_runq)) { + spin_unlock(&sched->ws_lock); + return 0; + } + spin_unlock(&sched->ws_lock); + return 1; +} + +/* XXX: + * 0. it only works when called from wi->wi_action. + * 1. when it returns no one shall try to schedule the workitem. + */ +void +cfs_wi_exit(struct cfs_wi_sched *sched, cfs_workitem_t *wi) +{ + LASSERT(!in_interrupt()); /* because we use plain spinlock */ + LASSERT(!sched->ws_stopping); + + spin_lock(&sched->ws_lock); + + LASSERT(wi->wi_running); + if (wi->wi_scheduled) { /* cancel pending schedules */ + LASSERT(!list_empty(&wi->wi_list)); + list_del_init(&wi->wi_list); + + LASSERT(sched->ws_nscheduled > 0); + sched->ws_nscheduled--; + } + + LASSERT(list_empty(&wi->wi_list)); + + wi->wi_scheduled = 1; /* LBUG future schedule attempts */ + spin_unlock(&sched->ws_lock); +} +EXPORT_SYMBOL(cfs_wi_exit); + +/** + * cancel schedule request of workitem \a wi + */ +int +cfs_wi_deschedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi) +{ + int rc; + + LASSERT(!in_interrupt()); /* because we use plain spinlock */ + LASSERT(!sched->ws_stopping); + + /* + * return 0 if it's running already, otherwise return 1, which + * means the workitem will not be scheduled and will not have + * any race with wi_action. + */ + spin_lock(&sched->ws_lock); + + rc = !(wi->wi_running); + + if (wi->wi_scheduled) { /* cancel pending schedules */ + LASSERT(!list_empty(&wi->wi_list)); + list_del_init(&wi->wi_list); + + LASSERT(sched->ws_nscheduled > 0); + sched->ws_nscheduled--; + + wi->wi_scheduled = 0; + } + + LASSERT(list_empty(&wi->wi_list)); + + spin_unlock(&sched->ws_lock); + return rc; +} +EXPORT_SYMBOL(cfs_wi_deschedule); + +/* + * Workitem scheduled with (serial == 1) is strictly serialised not only with + * itself, but also with others scheduled this way. + * + * Now there's only one static serialised queue, but in the future more might + * be added, and even dynamic creation of serialised queues might be supported. + */ +void +cfs_wi_schedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi) +{ + LASSERT(!in_interrupt()); /* because we use plain spinlock */ + LASSERT(!sched->ws_stopping); + + spin_lock(&sched->ws_lock); + + if (!wi->wi_scheduled) { + LASSERT(list_empty(&wi->wi_list)); + + wi->wi_scheduled = 1; + sched->ws_nscheduled++; + if (!wi->wi_running) { + list_add_tail(&wi->wi_list, &sched->ws_runq); + wake_up(&sched->ws_waitq); + } else { + list_add(&wi->wi_list, &sched->ws_rerunq); + } + } + + LASSERT(!list_empty(&wi->wi_list)); + spin_unlock(&sched->ws_lock); +} +EXPORT_SYMBOL(cfs_wi_schedule); + +static int cfs_wi_scheduler(void *arg) +{ + struct cfs_wi_sched *sched = (struct cfs_wi_sched *)arg; + + cfs_block_allsigs(); + + /* CPT affinity scheduler? */ + if (sched->ws_cptab) + if (cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt) != 0) + CWARN("Failed to bind %s on CPT %d\n", + sched->ws_name, sched->ws_cpt); + + spin_lock(&cfs_wi_data.wi_glock); + + LASSERT(sched->ws_starting == 1); + sched->ws_starting--; + sched->ws_nthreads++; + + spin_unlock(&cfs_wi_data.wi_glock); + + spin_lock(&sched->ws_lock); + + while (!sched->ws_stopping) { + int nloops = 0; + int rc; + cfs_workitem_t *wi; + + while (!list_empty(&sched->ws_runq) && + nloops < CFS_WI_RESCHED) { + wi = list_entry(sched->ws_runq.next, cfs_workitem_t, + wi_list); + LASSERT(wi->wi_scheduled && !wi->wi_running); + + list_del_init(&wi->wi_list); + + LASSERT(sched->ws_nscheduled > 0); + sched->ws_nscheduled--; + + wi->wi_running = 1; + wi->wi_scheduled = 0; + + spin_unlock(&sched->ws_lock); + nloops++; + + rc = (*wi->wi_action) (wi); + + spin_lock(&sched->ws_lock); + if (rc != 0) /* WI should be dead, even be freed! */ + continue; + + wi->wi_running = 0; + if (list_empty(&wi->wi_list)) + continue; + + LASSERT(wi->wi_scheduled); + /* wi is rescheduled, should be on rerunq now, we + * move it to runq so it can run action now + */ + list_move_tail(&wi->wi_list, &sched->ws_runq); + } + + if (!list_empty(&sched->ws_runq)) { + spin_unlock(&sched->ws_lock); + /* don't sleep because some workitems still + * expect me to come back soon + */ + cond_resched(); + spin_lock(&sched->ws_lock); + continue; + } + + spin_unlock(&sched->ws_lock); + rc = wait_event_interruptible_exclusive(sched->ws_waitq, + !cfs_wi_sched_cansleep(sched)); + spin_lock(&sched->ws_lock); + } + + spin_unlock(&sched->ws_lock); + + spin_lock(&cfs_wi_data.wi_glock); + sched->ws_nthreads--; + spin_unlock(&cfs_wi_data.wi_glock); + + return 0; +} + +void +cfs_wi_sched_destroy(struct cfs_wi_sched *sched) +{ + int i; + + LASSERT(cfs_wi_data.wi_init); + LASSERT(!cfs_wi_data.wi_stopping); + + spin_lock(&cfs_wi_data.wi_glock); + if (sched->ws_stopping) { + CDEBUG(D_INFO, "%s is in progress of stopping\n", + sched->ws_name); + spin_unlock(&cfs_wi_data.wi_glock); + return; + } + + LASSERT(!list_empty(&sched->ws_list)); + sched->ws_stopping = 1; + + spin_unlock(&cfs_wi_data.wi_glock); + + i = 2; + wake_up_all(&sched->ws_waitq); + + spin_lock(&cfs_wi_data.wi_glock); + while (sched->ws_nthreads > 0) { + CDEBUG(is_power_of_2(++i) ? D_WARNING : D_NET, + "waiting for %d threads of WI sched[%s] to terminate\n", + sched->ws_nthreads, sched->ws_name); + + spin_unlock(&cfs_wi_data.wi_glock); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(cfs_time_seconds(1) / 20); + spin_lock(&cfs_wi_data.wi_glock); + } + + list_del(&sched->ws_list); + + spin_unlock(&cfs_wi_data.wi_glock); + LASSERT(sched->ws_nscheduled == 0); + + LIBCFS_FREE(sched, sizeof(*sched)); +} +EXPORT_SYMBOL(cfs_wi_sched_destroy); + +int +cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab, + int cpt, int nthrs, struct cfs_wi_sched **sched_pp) +{ + struct cfs_wi_sched *sched; + int rc; + + LASSERT(cfs_wi_data.wi_init); + LASSERT(!cfs_wi_data.wi_stopping); + LASSERT(!cptab || cpt == CFS_CPT_ANY || + (cpt >= 0 && cpt < cfs_cpt_number(cptab))); + + LIBCFS_ALLOC(sched, sizeof(*sched)); + if (!sched) + return -ENOMEM; + + if (strlen(name) > sizeof(sched->ws_name) - 1) { + LIBCFS_FREE(sched, sizeof(*sched)); + return -E2BIG; + } + strncpy(sched->ws_name, name, sizeof(sched->ws_name)); + + sched->ws_cptab = cptab; + sched->ws_cpt = cpt; + + spin_lock_init(&sched->ws_lock); + init_waitqueue_head(&sched->ws_waitq); + INIT_LIST_HEAD(&sched->ws_runq); + INIT_LIST_HEAD(&sched->ws_rerunq); + INIT_LIST_HEAD(&sched->ws_list); + + rc = 0; + while (nthrs > 0) { + char name[16]; + struct task_struct *task; + + spin_lock(&cfs_wi_data.wi_glock); + while (sched->ws_starting > 0) { + spin_unlock(&cfs_wi_data.wi_glock); + schedule(); + spin_lock(&cfs_wi_data.wi_glock); + } + + sched->ws_starting++; + spin_unlock(&cfs_wi_data.wi_glock); + + if (sched->ws_cptab && sched->ws_cpt >= 0) { + snprintf(name, sizeof(name), "%s_%02d_%02u", + sched->ws_name, sched->ws_cpt, + sched->ws_nthreads); + } else { + snprintf(name, sizeof(name), "%s_%02u", + sched->ws_name, sched->ws_nthreads); + } + + task = kthread_run(cfs_wi_scheduler, sched, "%s", name); + if (!IS_ERR(task)) { + nthrs--; + continue; + } + rc = PTR_ERR(task); + + CERROR("Failed to create thread for WI scheduler %s: %d\n", + name, rc); + + spin_lock(&cfs_wi_data.wi_glock); + + /* make up for cfs_wi_sched_destroy */ + list_add(&sched->ws_list, &cfs_wi_data.wi_scheds); + sched->ws_starting--; + + spin_unlock(&cfs_wi_data.wi_glock); + + cfs_wi_sched_destroy(sched); + return rc; + } + spin_lock(&cfs_wi_data.wi_glock); + list_add(&sched->ws_list, &cfs_wi_data.wi_scheds); + spin_unlock(&cfs_wi_data.wi_glock); + + *sched_pp = sched; + return 0; +} +EXPORT_SYMBOL(cfs_wi_sched_create); + +int +cfs_wi_startup(void) +{ + memset(&cfs_wi_data, 0, sizeof(cfs_wi_data)); + + spin_lock_init(&cfs_wi_data.wi_glock); + INIT_LIST_HEAD(&cfs_wi_data.wi_scheds); + cfs_wi_data.wi_init = 1; + + return 0; +} + +void +cfs_wi_shutdown(void) +{ + struct cfs_wi_sched *sched; + struct cfs_wi_sched *temp; + + spin_lock(&cfs_wi_data.wi_glock); + cfs_wi_data.wi_stopping = 1; + spin_unlock(&cfs_wi_data.wi_glock); + + /* nobody should contend on this list */ + list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) { + sched->ws_stopping = 1; + wake_up_all(&sched->ws_waitq); + } + + list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) { + spin_lock(&cfs_wi_data.wi_glock); + + while (sched->ws_nthreads != 0) { + spin_unlock(&cfs_wi_data.wi_glock); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(cfs_time_seconds(1) / 20); + spin_lock(&cfs_wi_data.wi_glock); + } + spin_unlock(&cfs_wi_data.wi_glock); + } + list_for_each_entry_safe(sched, temp, &cfs_wi_data.wi_scheds, ws_list) { + list_del(&sched->ws_list); + LIBCFS_FREE(sched, sizeof(*sched)); + } + + cfs_wi_data.wi_stopping = 0; + cfs_wi_data.wi_init = 0; +} diff --git a/drivers/staging/lustre/lnet/lnet/Makefile b/drivers/staging/lustre/lnet/lnet/Makefile index e276fe2bf..4c81fa194 100644 --- a/drivers/staging/lustre/lnet/lnet/Makefile +++ b/drivers/staging/lustre/lnet/lnet/Makefile @@ -1,6 +1,6 @@ obj-$(CONFIG_LNET) += lnet.o -lnet-y := api-ni.o config.o nidstrings.o \ +lnet-y := api-ni.o config.o nidstrings.o net_fault.o \ lib-me.o lib-msg.o lib-eq.o lib-md.o lib-ptl.o \ lib-socket.o lib-move.o module.o lo.o \ router.o router_proc.o acceptor.o peer.o diff --git a/drivers/staging/lustre/lnet/lnet/acceptor.c b/drivers/staging/lustre/lnet/lnet/acceptor.c index fed57d900..1452bb3ad 100644 --- a/drivers/staging/lustre/lnet/lnet/acceptor.c +++ b/drivers/staging/lustre/lnet/lnet/acceptor.c @@ -36,6 +36,7 @@ #define DEBUG_SUBSYSTEM S_LNET #include <linux/completion.h> +#include <net/sock.h> #include "../../include/linux/lnet/lib-lnet.h" static int accept_port = 988; @@ -46,7 +47,9 @@ static struct { int pta_shutdown; struct socket *pta_sock; struct completion pta_signal; -} lnet_acceptor_state; +} lnet_acceptor_state = { + .pta_shutdown = 1 +}; int lnet_acceptor_port(void) @@ -78,9 +81,11 @@ static char *accept_type; static int lnet_acceptor_get_tunables(void) { - /* Userland acceptor uses 'accept_type' instead of 'accept', due to + /* + * Userland acceptor uses 'accept_type' instead of 'accept', due to * conflict with 'accept(2)', but kernel acceptor still uses 'accept' - * for compatibility. Hence the trick. */ + * for compatibility. Hence the trick. + */ accept_type = accept; return 0; } @@ -140,7 +145,7 @@ EXPORT_SYMBOL(lnet_connect_console_error); int lnet_connect(struct socket **sockp, lnet_nid_t peer_nid, - __u32 local_ip, __u32 peer_ip, int peer_port) + __u32 local_ip, __u32 peer_ip, int peer_port) { lnet_acceptor_connreq_t cr; struct socket *sock; @@ -157,7 +162,7 @@ lnet_connect(struct socket **sockp, lnet_nid_t peer_nid, rc = lnet_sock_connect(&sock, &fatal, local_ip, port, peer_ip, peer_port); - if (rc != 0) { + if (rc) { if (fatal) goto failed; continue; @@ -169,14 +174,14 @@ lnet_connect(struct socket **sockp, lnet_nid_t peer_nid, cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION; cr.acr_nid = peer_nid; - if (the_lnet.ln_testprotocompat != 0) { + if (the_lnet.ln_testprotocompat) { /* single-shot proto check */ lnet_net_lock(LNET_LOCK_EX); - if ((the_lnet.ln_testprotocompat & 4) != 0) { + if (the_lnet.ln_testprotocompat & 4) { cr.acr_version++; the_lnet.ln_testprotocompat &= ~4; } - if ((the_lnet.ln_testprotocompat & 8) != 0) { + if (the_lnet.ln_testprotocompat & 8) { cr.acr_magic = LNET_PROTO_MAGIC; the_lnet.ln_testprotocompat &= ~8; } @@ -184,7 +189,7 @@ lnet_connect(struct socket **sockp, lnet_nid_t peer_nid, } rc = lnet_sock_write(sock, &cr, sizeof(cr), accept_timeout); - if (rc != 0) + if (rc) goto failed_sock; *sockp = sock; @@ -202,8 +207,6 @@ lnet_connect(struct socket **sockp, lnet_nid_t peer_nid, } EXPORT_SYMBOL(lnet_connect); -/* Below is the code common for both kernel and MT user-space */ - static int lnet_accept(struct socket *sock, __u32 magic) { @@ -218,23 +221,23 @@ lnet_accept(struct socket *sock, __u32 magic) LASSERT(sizeof(cr) <= 16); /* not too big for the stack */ rc = lnet_sock_getaddr(sock, 1, &peer_ip, &peer_port); - LASSERT(rc == 0); /* we succeeded before */ + LASSERT(!rc); /* we succeeded before */ if (!lnet_accept_magic(magic, LNET_PROTO_ACCEPTOR_MAGIC)) { - if (lnet_accept_magic(magic, LNET_PROTO_MAGIC)) { - /* future version compatibility! + /* + * future version compatibility! * When LNET unifies protocols over all LNDs, the first - * thing sent will be a version query. I send back - * LNET_PROTO_ACCEPTOR_MAGIC to tell her I'm "old" */ - + * thing sent will be a version query. I send back + * LNET_PROTO_ACCEPTOR_MAGIC to tell her I'm "old" + */ memset(&cr, 0, sizeof(cr)); cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC; cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION; rc = lnet_sock_write(sock, &cr, sizeof(cr), accept_timeout); - if (rc != 0) + if (rc) CERROR("Error sending magic+version in response to LNET magic from %pI4h: %d\n", &peer_ip, rc); return -EPROTO; @@ -254,9 +257,9 @@ lnet_accept(struct socket *sock, __u32 magic) rc = lnet_sock_read(sock, &cr.acr_version, sizeof(cr.acr_version), accept_timeout); - if (rc != 0) { + if (rc) { CERROR("Error %d reading connection request version from %pI4h\n", - rc, &peer_ip); + rc, &peer_ip); return -EIO; } @@ -264,10 +267,12 @@ lnet_accept(struct socket *sock, __u32 magic) __swab32s(&cr.acr_version); if (cr.acr_version != LNET_PROTO_ACCEPTOR_VERSION) { - /* future version compatibility! + /* + * future version compatibility! * An acceptor-specific protocol rev will first send a version * query. I send back my current version to tell her I'm - * "old". */ + * "old". + */ int peer_version = cr.acr_version; memset(&cr, 0, sizeof(cr)); @@ -275,7 +280,7 @@ lnet_accept(struct socket *sock, __u32 magic) cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION; rc = lnet_sock_write(sock, &cr, sizeof(cr), accept_timeout); - if (rc != 0) + if (rc) CERROR("Error sending magic+version in response to version %d from %pI4h: %d\n", peer_version, &peer_ip, rc); return -EPROTO; @@ -285,9 +290,9 @@ lnet_accept(struct socket *sock, __u32 magic) sizeof(cr) - offsetof(lnet_acceptor_connreq_t, acr_nid), accept_timeout); - if (rc != 0) { + if (rc) { CERROR("Error %d reading connection request from %pI4h\n", - rc, &peer_ip); + rc, &peer_ip); return -EIO; } @@ -295,20 +300,20 @@ lnet_accept(struct socket *sock, __u32 magic) __swab64s(&cr.acr_nid); ni = lnet_net2ni(LNET_NIDNET(cr.acr_nid)); - if (ni == NULL || /* no matching net */ + if (!ni || /* no matching net */ ni->ni_nid != cr.acr_nid) { /* right NET, wrong NID! */ - if (ni != NULL) + if (ni) lnet_ni_decref(ni); LCONSOLE_ERROR_MSG(0x120, "Refusing connection from %pI4h for %s: No matching NI\n", &peer_ip, libcfs_nid2str(cr.acr_nid)); return -EPERM; } - if (ni->ni_lnd->lnd_accept == NULL) { + if (!ni->ni_lnd->lnd_accept) { /* This catches a request for the loopback LND */ lnet_ni_decref(ni); LCONSOLE_ERROR_MSG(0x121, "Refusing connection from %pI4h for %s: NI doesn not accept IP connections\n", - &peer_ip, libcfs_nid2str(cr.acr_nid)); + &peer_ip, libcfs_nid2str(cr.acr_nid)); return -EPERM; } @@ -331,13 +336,13 @@ lnet_acceptor(void *arg) int peer_port; int secure = (int)((long_ptr_t)arg); - LASSERT(lnet_acceptor_state.pta_sock == NULL); + LASSERT(!lnet_acceptor_state.pta_sock); cfs_block_allsigs(); rc = lnet_sock_listen(&lnet_acceptor_state.pta_sock, 0, accept_port, accept_backlog); - if (rc != 0) { + if (rc) { if (rc == -EADDRINUSE) LCONSOLE_ERROR_MSG(0x122, "Can't start acceptor on port %d: port already in use\n", accept_port); @@ -354,13 +359,12 @@ lnet_acceptor(void *arg) lnet_acceptor_state.pta_shutdown = rc; complete(&lnet_acceptor_state.pta_signal); - if (rc != 0) + if (rc) return rc; while (!lnet_acceptor_state.pta_shutdown) { - rc = lnet_sock_accept(&newsock, lnet_acceptor_state.pta_sock); - if (rc != 0) { + if (rc) { if (rc != -EAGAIN) { CWARN("Accept error %d: pausing...\n", rc); set_current_state(TASK_UNINTERRUPTIBLE); @@ -376,7 +380,7 @@ lnet_acceptor(void *arg) } rc = lnet_sock_getaddr(newsock, 1, &peer_ip, &peer_port); - if (rc != 0) { + if (rc) { CERROR("Can't determine new connection's address\n"); goto failed; } @@ -389,14 +393,14 @@ lnet_acceptor(void *arg) rc = lnet_sock_read(newsock, &magic, sizeof(magic), accept_timeout); - if (rc != 0) { + if (rc) { CERROR("Error %d reading connection request from %pI4h\n", - rc, &peer_ip); + rc, &peer_ip); goto failed; } rc = lnet_accept(newsock, magic); - if (rc != 0) + if (rc) goto failed; continue; @@ -436,14 +440,19 @@ accept2secure(const char *acc, long *sec) int lnet_acceptor_start(void) { + struct task_struct *task; int rc; long rc2; long secure; - LASSERT(lnet_acceptor_state.pta_sock == NULL); + /* if acceptor is already running return immediately */ + if (!lnet_acceptor_state.pta_shutdown) + return 0; + + LASSERT(!lnet_acceptor_state.pta_sock); rc = lnet_acceptor_get_tunables(); - if (rc != 0) + if (rc) return rc; init_completion(&lnet_acceptor_state.pta_signal); @@ -451,13 +460,13 @@ lnet_acceptor_start(void) if (rc <= 0) return rc; - if (lnet_count_acceptor_nis() == 0) /* not required */ + if (!lnet_count_acceptor_nis()) /* not required */ return 0; - rc2 = PTR_ERR(kthread_run(lnet_acceptor, - (void *)(ulong_ptr_t)secure, - "acceptor_%03ld", secure)); - if (IS_ERR_VALUE(rc2)) { + task = kthread_run(lnet_acceptor, (void *)(ulong_ptr_t)secure, + "acceptor_%03ld", secure); + if (IS_ERR(task)) { + rc2 = PTR_ERR(task); CERROR("Can't start acceptor thread: %ld\n", rc2); return -ESRCH; @@ -468,11 +477,11 @@ lnet_acceptor_start(void) if (!lnet_acceptor_state.pta_shutdown) { /* started OK */ - LASSERT(lnet_acceptor_state.pta_sock != NULL); + LASSERT(lnet_acceptor_state.pta_sock); return 0; } - LASSERT(lnet_acceptor_state.pta_sock == NULL); + LASSERT(!lnet_acceptor_state.pta_sock); return -ENETDOWN; } @@ -480,11 +489,17 @@ lnet_acceptor_start(void) void lnet_acceptor_stop(void) { - if (lnet_acceptor_state.pta_sock == NULL) /* not running */ + struct sock *sk; + + if (lnet_acceptor_state.pta_shutdown) /* not running */ return; lnet_acceptor_state.pta_shutdown = 1; - wake_up_all(sk_sleep(lnet_acceptor_state.pta_sock->sk)); + + sk = lnet_acceptor_state.pta_sock->sk; + + /* awake any sleepers using safe method */ + sk->sk_state_change(sk); /* block until acceptor signals exit */ wait_for_completion(&lnet_acceptor_state.pta_signal); diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c index 362282fa0..876475554 100644 --- a/drivers/staging/lustre/lnet/lnet/api-ni.c +++ b/drivers/staging/lustre/lnet/lnet/api-ni.c @@ -39,6 +39,7 @@ #include <linux/ktime.h> #include "../../include/linux/lnet/lib-lnet.h" +#include "../../include/linux/lnet/lib-dlc.h" #define D_LNI D_CONSOLE @@ -61,6 +62,9 @@ static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT; module_param(rnet_htable_size, int, 0444); MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table"); +static int lnet_ping(lnet_process_id_t id, int timeout_ms, + lnet_process_id_t __user *ids, int n_ids); + static char * lnet_get_routes(void) { @@ -73,17 +77,17 @@ lnet_get_networks(void) char *nets; int rc; - if (*networks != 0 && *ip2nets != 0) { + if (*networks && *ip2nets) { LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or 'ip2nets' but not both at once\n"); return NULL; } - if (*ip2nets != 0) { + if (*ip2nets) { rc = lnet_parse_ip2nets(&nets, ip2nets); - return (rc == 0) ? nets : NULL; + return !rc ? nets : NULL; } - if (*networks != 0) + if (*networks) return networks; return "tcp"; @@ -94,6 +98,7 @@ lnet_init_locks(void) { spin_lock_init(&the_lnet.ln_eq_wait_lock); init_waitqueue_head(&the_lnet.ln_eq_waitq); + init_waitqueue_head(&the_lnet.ln_rc_waitq); mutex_init(&the_lnet.ln_lnd_mutex); mutex_init(&the_lnet.ln_api_mutex); } @@ -104,10 +109,10 @@ lnet_create_remote_nets_table(void) int i; struct list_head *hash; - LASSERT(the_lnet.ln_remote_nets_hash == NULL); + LASSERT(!the_lnet.ln_remote_nets_hash); LASSERT(the_lnet.ln_remote_nets_hbits > 0); LIBCFS_ALLOC(hash, LNET_REMOTE_NETS_HASH_SIZE * sizeof(*hash)); - if (hash == NULL) { + if (!hash) { CERROR("Failed to create remote nets hash table\n"); return -ENOMEM; } @@ -123,7 +128,7 @@ lnet_destroy_remote_nets_table(void) { int i; - if (the_lnet.ln_remote_nets_hash == NULL) + if (!the_lnet.ln_remote_nets_hash) return; for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) @@ -138,12 +143,12 @@ lnet_destroy_remote_nets_table(void) static void lnet_destroy_locks(void) { - if (the_lnet.ln_res_lock != NULL) { + if (the_lnet.ln_res_lock) { cfs_percpt_lock_free(the_lnet.ln_res_lock); the_lnet.ln_res_lock = NULL; } - if (the_lnet.ln_net_lock != NULL) { + if (the_lnet.ln_net_lock) { cfs_percpt_lock_free(the_lnet.ln_net_lock); the_lnet.ln_net_lock = NULL; } @@ -155,11 +160,11 @@ lnet_create_locks(void) lnet_init_locks(); the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table()); - if (the_lnet.ln_res_lock == NULL) + if (!the_lnet.ln_res_lock) goto failed; the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table()); - if (the_lnet.ln_net_lock == NULL) + if (!the_lnet.ln_net_lock) goto failed; return 0; @@ -171,10 +176,12 @@ lnet_create_locks(void) static void lnet_assert_wire_constants(void) { - /* Wire protocol assertions generated by 'wirecheck' + /* + * Wire protocol assertions generated by 'wirecheck' * running on Linux robert.bartonsoftware.com 2.6.8-1.521 * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux - * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) */ + * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) + */ /* Constants... */ CLASSERT(LNET_PROTO_TCP_MAGIC == 0xeebc0ded); @@ -284,9 +291,8 @@ lnet_register_lnd(lnd_t *lnd) { mutex_lock(&the_lnet.ln_lnd_mutex); - LASSERT(the_lnet.ln_init); LASSERT(libcfs_isknown_lnd(lnd->lnd_type)); - LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == NULL); + LASSERT(!lnet_find_lnd_by_type(lnd->lnd_type)); list_add_tail(&lnd->lnd_list, &the_lnet.ln_lnds); lnd->lnd_refcount = 0; @@ -302,9 +308,8 @@ lnet_unregister_lnd(lnd_t *lnd) { mutex_lock(&the_lnet.ln_lnd_mutex); - LASSERT(the_lnet.ln_init); LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd); - LASSERT(lnd->lnd_refcount == 0); + LASSERT(!lnd->lnd_refcount); list_del(&lnd->lnd_list); CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type)); @@ -335,7 +340,6 @@ lnet_counters_get(lnet_counters_t *counters) counters->recv_length += ctr->recv_length; counters->route_length += ctr->route_length; counters->drop_length += ctr->drop_length; - } lnet_net_unlock(LNET_LOCK_EX); } @@ -375,7 +379,7 @@ lnet_res_container_cleanup(struct lnet_res_container *rec) { int count = 0; - if (rec->rec_type == 0) /* not set yet, it's uninitialized */ + if (!rec->rec_type) /* not set yet, it's uninitialized */ return; while (!list_empty(&rec->rec_active)) { @@ -395,14 +399,16 @@ lnet_res_container_cleanup(struct lnet_res_container *rec) } if (count > 0) { - /* Found alive MD/ME/EQ, user really should unlink/free + /* + * Found alive MD/ME/EQ, user really should unlink/free * all of them before finalize LNet, but if someone didn't, - * we have to recycle garbage for him */ + * we have to recycle garbage for him + */ CERROR("%d active elements on exit of %s container\n", count, lnet_res_type2str(rec->rec_type)); } - if (rec->rec_lh_hash != NULL) { + if (rec->rec_lh_hash) { LIBCFS_FREE(rec->rec_lh_hash, LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0])); rec->rec_lh_hash = NULL; @@ -417,7 +423,7 @@ lnet_res_container_setup(struct lnet_res_container *rec, int cpt, int type) int rc = 0; int i; - LASSERT(rec->rec_type == 0); + LASSERT(!rec->rec_type); rec->rec_type = type; INIT_LIST_HEAD(&rec->rec_active); @@ -426,7 +432,7 @@ lnet_res_container_setup(struct lnet_res_container *rec, int cpt, int type) /* Arbitrary choice of hash table size */ LIBCFS_CPT_ALLOC(rec->rec_lh_hash, lnet_cpt_table(), cpt, LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0])); - if (rec->rec_lh_hash == NULL) { + if (!rec->rec_lh_hash) { rc = -ENOMEM; goto out; } @@ -464,7 +470,7 @@ lnet_res_containers_create(int type) int i; recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec)); - if (recs == NULL) { + if (!recs) { CERROR("Failed to allocate %s resource containers\n", lnet_res_type2str(type)); return NULL; @@ -472,7 +478,7 @@ lnet_res_containers_create(int type) cfs_percpt_for_each(rec, i, recs) { rc = lnet_res_container_setup(rec, i, type); - if (rc != 0) { + if (rc) { lnet_res_containers_destroy(recs); return NULL; } @@ -518,7 +524,7 @@ lnet_res_lh_initialize(struct lnet_res_container *rec, lnet_libhandle_t *lh) list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]); } -int lnet_unprepare(void); +static int lnet_unprepare(void); static int lnet_prepare(lnet_pid_t requested_pid) @@ -527,11 +533,16 @@ lnet_prepare(lnet_pid_t requested_pid) struct lnet_res_container **recs; int rc = 0; - LASSERT(the_lnet.ln_refcount == 0); + if (requested_pid == LNET_PID_ANY) { + /* Don't instantiate LNET just for me */ + return -ENETDOWN; + } + + LASSERT(!the_lnet.ln_refcount); the_lnet.ln_routing = 0; - LASSERT((requested_pid & LNET_PID_USERFLAG) == 0); + LASSERT(!(requested_pid & LNET_PID_USERFLAG)); the_lnet.ln_pid = requested_pid; INIT_LIST_HEAD(&the_lnet.ln_test_peers); @@ -539,9 +550,11 @@ lnet_prepare(lnet_pid_t requested_pid) INIT_LIST_HEAD(&the_lnet.ln_nis_cpt); INIT_LIST_HEAD(&the_lnet.ln_nis_zombie); INIT_LIST_HEAD(&the_lnet.ln_routers); + INIT_LIST_HEAD(&the_lnet.ln_drop_rules); + INIT_LIST_HEAD(&the_lnet.ln_delay_rules); rc = lnet_create_remote_nets_table(); - if (rc != 0) + if (rc) goto failed; /* * NB the interface cookie in wire handles guards against delayed @@ -551,27 +564,27 @@ lnet_prepare(lnet_pid_t requested_pid) the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(), sizeof(lnet_counters_t)); - if (the_lnet.ln_counters == NULL) { + if (!the_lnet.ln_counters) { CERROR("Failed to allocate counters for LNet\n"); rc = -ENOMEM; goto failed; } rc = lnet_peer_tables_create(); - if (rc != 0) + if (rc) goto failed; rc = lnet_msg_containers_create(); - if (rc != 0) + if (rc) goto failed; rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0, LNET_COOKIE_TYPE_EQ); - if (rc != 0) + if (rc) goto failed; recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME); - if (recs == NULL) { + if (!recs) { rc = -ENOMEM; goto failed; } @@ -579,7 +592,7 @@ lnet_prepare(lnet_pid_t requested_pid) the_lnet.ln_me_containers = recs; recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD); - if (recs == NULL) { + if (!recs) { rc = -ENOMEM; goto failed; } @@ -587,7 +600,7 @@ lnet_prepare(lnet_pid_t requested_pid) the_lnet.ln_md_containers = recs; rc = lnet_portals_create(); - if (rc != 0) { + if (rc) { CERROR("Failed to create portals for LNet: %d\n", rc); goto failed; } @@ -599,17 +612,18 @@ lnet_prepare(lnet_pid_t requested_pid) return rc; } -int +static int lnet_unprepare(void) { - /* NB no LNET_LOCK since this is the last reference. All LND instances + /* + * NB no LNET_LOCK since this is the last reference. All LND instances * have shut down already, so it is safe to unlink and free all * descriptors, even those that appear committed to a network op (eg MD - * with non-zero pending count) */ - + * with non-zero pending count) + */ lnet_fail_nid(LNET_NID_ANY, 0); - LASSERT(the_lnet.ln_refcount == 0); + LASSERT(!the_lnet.ln_refcount); LASSERT(list_empty(&the_lnet.ln_test_peers)); LASSERT(list_empty(&the_lnet.ln_nis)); LASSERT(list_empty(&the_lnet.ln_nis_cpt)); @@ -617,12 +631,12 @@ lnet_unprepare(void) lnet_portals_destroy(); - if (the_lnet.ln_md_containers != NULL) { + if (the_lnet.ln_md_containers) { lnet_res_containers_destroy(the_lnet.ln_md_containers); the_lnet.ln_md_containers = NULL; } - if (the_lnet.ln_me_containers != NULL) { + if (the_lnet.ln_me_containers) { lnet_res_containers_destroy(the_lnet.ln_me_containers); the_lnet.ln_me_containers = NULL; } @@ -631,9 +645,9 @@ lnet_unprepare(void) lnet_msg_containers_destroy(); lnet_peer_tables_destroy(); - lnet_rtrpools_free(); + lnet_rtrpools_free(0); - if (the_lnet.ln_counters != NULL) { + if (the_lnet.ln_counters) { cfs_percpt_free(the_lnet.ln_counters); the_lnet.ln_counters = NULL; } @@ -709,7 +723,7 @@ lnet_cpt_of_nid_locked(lnet_nid_t nid) if (LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid)) continue; - LASSERT(ni->ni_cpts != NULL); + LASSERT(ni->ni_cpts); return ni->ni_cpts[lnet_nid_cpt_hash (nid, ni->ni_ncpts)]; } @@ -747,12 +761,12 @@ lnet_islocalnet(__u32 net) cpt = lnet_net_lock_current(); ni = lnet_net2ni_locked(net, cpt); - if (ni != NULL) + if (ni) lnet_ni_decref_locked(ni, cpt); lnet_net_unlock(cpt); - return ni != NULL; + return !!ni; } lnet_ni_t * @@ -783,11 +797,11 @@ lnet_islocalnid(lnet_nid_t nid) cpt = lnet_net_lock_current(); ni = lnet_nid2ni_locked(nid, cpt); - if (ni != NULL) + if (ni) lnet_ni_decref_locked(ni, cpt); lnet_net_unlock(cpt); - return ni != NULL; + return !!ni; } int @@ -803,7 +817,7 @@ lnet_count_acceptor_nis(void) list_for_each(tmp, &the_lnet.ln_nis) { ni = list_entry(tmp, lnet_ni_t, ni_list); - if (ni->ni_lnd->lnd_accept != NULL) + if (ni->ni_lnd->lnd_accept) count++; } @@ -812,90 +826,280 @@ lnet_count_acceptor_nis(void) return count; } -static int -lnet_ni_tq_credits(lnet_ni_t *ni) +static lnet_ping_info_t * +lnet_ping_info_create(int num_ni) { - int credits; + lnet_ping_info_t *ping_info; + unsigned int infosz; - LASSERT(ni->ni_ncpts >= 1); + infosz = offsetof(lnet_ping_info_t, pi_ni[num_ni]); + LIBCFS_ALLOC(ping_info, infosz); + if (!ping_info) { + CERROR("Can't allocate ping info[%d]\n", num_ni); + return NULL; + } - if (ni->ni_ncpts == 1) - return ni->ni_maxtxcredits; + ping_info->pi_nnis = num_ni; + ping_info->pi_pid = the_lnet.ln_pid; + ping_info->pi_magic = LNET_PROTO_PING_MAGIC; + ping_info->pi_features = LNET_PING_FEAT_NI_STATUS; - credits = ni->ni_maxtxcredits / ni->ni_ncpts; - credits = max(credits, 8 * ni->ni_peertxcredits); - credits = min(credits, ni->ni_maxtxcredits); + return ping_info; +} - return credits; +static inline int +lnet_get_ni_count(void) +{ + struct lnet_ni *ni; + int count = 0; + + lnet_net_lock(0); + + list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) + count++; + + lnet_net_unlock(0); + + return count; +} + +static inline void +lnet_ping_info_free(lnet_ping_info_t *pinfo) +{ + LIBCFS_FREE(pinfo, + offsetof(lnet_ping_info_t, + pi_ni[pinfo->pi_nnis])); } static void -lnet_shutdown_lndnis(void) +lnet_ping_info_destroy(void) { - int i; - int islo; - lnet_ni_t *ni; + struct lnet_ni *ni; - /* NB called holding the global mutex */ + lnet_net_lock(LNET_LOCK_EX); - /* All quiet on the API front */ - LASSERT(!the_lnet.ln_shutdown); - LASSERT(the_lnet.ln_refcount == 0); - LASSERT(list_empty(&the_lnet.ln_nis_zombie)); + list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) { + lnet_ni_lock(ni); + ni->ni_status = NULL; + lnet_ni_unlock(ni); + } - lnet_net_lock(LNET_LOCK_EX); - the_lnet.ln_shutdown = 1; /* flag shutdown */ + lnet_ping_info_free(the_lnet.ln_ping_info); + the_lnet.ln_ping_info = NULL; - /* Unlink NIs from the global table */ - while (!list_empty(&the_lnet.ln_nis)) { - ni = list_entry(the_lnet.ln_nis.next, - lnet_ni_t, ni_list); - /* move it to zombie list and nobody can find it anymore */ - list_move(&ni->ni_list, &the_lnet.ln_nis_zombie); - lnet_ni_decref_locked(ni, 0); /* drop ln_nis' ref */ - - if (!list_empty(&ni->ni_cptlist)) { - list_del_init(&ni->ni_cptlist); - lnet_ni_decref_locked(ni, 0); + lnet_net_unlock(LNET_LOCK_EX); +} + +static void +lnet_ping_event_handler(lnet_event_t *event) +{ + lnet_ping_info_t *pinfo = event->md.user_ptr; + + if (event->unlinked) + pinfo->pi_features = LNET_PING_FEAT_INVAL; +} + +static int +lnet_ping_info_setup(lnet_ping_info_t **ppinfo, lnet_handle_md_t *md_handle, + int ni_count, bool set_eq) +{ + lnet_process_id_t id = {LNET_NID_ANY, LNET_PID_ANY}; + lnet_handle_me_t me_handle; + lnet_md_t md = { NULL }; + int rc, rc2; + + if (set_eq) { + rc = LNetEQAlloc(0, lnet_ping_event_handler, + &the_lnet.ln_ping_target_eq); + if (rc) { + CERROR("Can't allocate ping EQ: %d\n", rc); + return rc; } } - /* Drop the cached eqwait NI. */ - if (the_lnet.ln_eq_waitni != NULL) { - lnet_ni_decref_locked(the_lnet.ln_eq_waitni, 0); - the_lnet.ln_eq_waitni = NULL; + *ppinfo = lnet_ping_info_create(ni_count); + if (!*ppinfo) { + rc = -ENOMEM; + goto failed_0; } - /* Drop the cached loopback NI. */ - if (the_lnet.ln_loni != NULL) { - lnet_ni_decref_locked(the_lnet.ln_loni, 0); - the_lnet.ln_loni = NULL; + rc = LNetMEAttach(LNET_RESERVED_PORTAL, id, + LNET_PROTO_PING_MATCHBITS, 0, + LNET_UNLINK, LNET_INS_AFTER, + &me_handle); + if (rc) { + CERROR("Can't create ping ME: %d\n", rc); + goto failed_1; } - lnet_net_unlock(LNET_LOCK_EX); + /* initialize md content */ + md.start = *ppinfo; + md.length = offsetof(lnet_ping_info_t, + pi_ni[(*ppinfo)->pi_nnis]); + md.threshold = LNET_MD_THRESH_INF; + md.max_size = 0; + md.options = LNET_MD_OP_GET | LNET_MD_TRUNCATE | + LNET_MD_MANAGE_REMOTE; + md.user_ptr = NULL; + md.eq_handle = the_lnet.ln_ping_target_eq; + md.user_ptr = *ppinfo; - /* Clear lazy portals and drop delayed messages which hold refs - * on their lnet_msg_t::msg_rxpeer */ - for (i = 0; i < the_lnet.ln_nportals; i++) - LNetClearLazyPortal(i); + rc = LNetMDAttach(me_handle, md, LNET_RETAIN, md_handle); + if (rc) { + CERROR("Can't attach ping MD: %d\n", rc); + goto failed_2; + } + + return 0; + +failed_2: + rc2 = LNetMEUnlink(me_handle); + LASSERT(!rc2); +failed_1: + lnet_ping_info_free(*ppinfo); + *ppinfo = NULL; +failed_0: + if (set_eq) + LNetEQFree(the_lnet.ln_ping_target_eq); + return rc; +} + +static void +lnet_ping_md_unlink(lnet_ping_info_t *pinfo, lnet_handle_md_t *md_handle) +{ + sigset_t blocked = cfs_block_allsigs(); + + LNetMDUnlink(*md_handle); + LNetInvalidateHandle(md_handle); + + /* NB md could be busy; this just starts the unlink */ + while (pinfo->pi_features != LNET_PING_FEAT_INVAL) { + CDEBUG(D_NET, "Still waiting for ping MD to unlink\n"); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(cfs_time_seconds(1)); + } + + cfs_restore_sigs(blocked); +} - /* Clear the peer table and wait for all peers to go (they hold refs on - * their NIs) */ - lnet_peer_tables_cleanup(); +static void +lnet_ping_info_install_locked(lnet_ping_info_t *ping_info) +{ + lnet_ni_status_t *ns; + lnet_ni_t *ni; + int i = 0; + + list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) { + LASSERT(i < ping_info->pi_nnis); + ns = &ping_info->pi_ni[i]; + + ns->ns_nid = ni->ni_nid; + + lnet_ni_lock(ni); + ns->ns_status = (ni->ni_status) ? + ni->ni_status->ns_status : LNET_NI_STATUS_UP; + ni->ni_status = ns; + lnet_ni_unlock(ni); + + i++; + } +} + +static void +lnet_ping_target_update(lnet_ping_info_t *pinfo, lnet_handle_md_t md_handle) +{ + lnet_ping_info_t *old_pinfo = NULL; + lnet_handle_md_t old_md; + + /* switch the NIs to point to the new ping info created */ lnet_net_lock(LNET_LOCK_EX); - /* Now wait for the NI's I just nuked to show up on ln_zombie_nis - * and shut them down in guaranteed thread context */ + + if (!the_lnet.ln_routing) + pinfo->pi_features |= LNET_PING_FEAT_RTE_DISABLED; + lnet_ping_info_install_locked(pinfo); + + if (the_lnet.ln_ping_info) { + old_pinfo = the_lnet.ln_ping_info; + old_md = the_lnet.ln_ping_target_md; + } + the_lnet.ln_ping_target_md = md_handle; + the_lnet.ln_ping_info = pinfo; + + lnet_net_unlock(LNET_LOCK_EX); + + if (old_pinfo) { + /* unlink the old ping info */ + lnet_ping_md_unlink(old_pinfo, &old_md); + lnet_ping_info_free(old_pinfo); + } +} + +static void +lnet_ping_target_fini(void) +{ + int rc; + + lnet_ping_md_unlink(the_lnet.ln_ping_info, + &the_lnet.ln_ping_target_md); + + rc = LNetEQFree(the_lnet.ln_ping_target_eq); + LASSERT(!rc); + + lnet_ping_info_destroy(); +} + +static int +lnet_ni_tq_credits(lnet_ni_t *ni) +{ + int credits; + + LASSERT(ni->ni_ncpts >= 1); + + if (ni->ni_ncpts == 1) + return ni->ni_maxtxcredits; + + credits = ni->ni_maxtxcredits / ni->ni_ncpts; + credits = max(credits, 8 * ni->ni_peertxcredits); + credits = min(credits, ni->ni_maxtxcredits); + + return credits; +} + +static void +lnet_ni_unlink_locked(lnet_ni_t *ni) +{ + if (!list_empty(&ni->ni_cptlist)) { + list_del_init(&ni->ni_cptlist); + lnet_ni_decref_locked(ni, 0); + } + + /* move it to zombie list and nobody can find it anymore */ + LASSERT(!list_empty(&ni->ni_list)); + list_move(&ni->ni_list, &the_lnet.ln_nis_zombie); + lnet_ni_decref_locked(ni, 0); /* drop ln_nis' ref */ +} + +static void +lnet_clear_zombies_nis_locked(void) +{ + int i; + int islo; + lnet_ni_t *ni; + lnet_ni_t *temp; + + /* + * Now wait for the NI's I just nuked to show up on ln_zombie_nis + * and shut them down in guaranteed thread context + */ i = 2; - while (!list_empty(&the_lnet.ln_nis_zombie)) { + list_for_each_entry_safe(ni, temp, &the_lnet.ln_nis_zombie, ni_list) { int *ref; int j; - ni = list_entry(the_lnet.ln_nis_zombie.next, - lnet_ni_t, ni_list); list_del_init(&ni->ni_list); cfs_percpt_for_each(ref, j, ni->ni_refs) { - if (*ref == 0) + if (!*ref) continue; /* still busy, add it back to zombie list */ list_add(&ni->ni_list, &the_lnet.ln_nis_zombie); @@ -921,11 +1125,12 @@ lnet_shutdown_lndnis(void) islo = ni->ni_lnd->lnd_type == LOLND; LASSERT(!in_interrupt()); - (ni->ni_lnd->lnd_shutdown)(ni); - - /* can't deref lnd anymore now; it might have unregistered - * itself... */ + ni->ni_lnd->lnd_shutdown(ni); + /* + * can't deref lnd anymore now; it might have unregistered + * itself... + */ if (!islo) CDEBUG(D_LNI, "Removed LNI %s\n", libcfs_nid2str(ni->ni_nid)); @@ -935,176 +1140,263 @@ lnet_shutdown_lndnis(void) lnet_net_lock(LNET_LOCK_EX); } +} + +static void +lnet_shutdown_lndnis(void) +{ + lnet_ni_t *ni; + lnet_ni_t *temp; + int i; + + /* NB called holding the global mutex */ + + /* All quiet on the API front */ + LASSERT(!the_lnet.ln_shutdown); + LASSERT(!the_lnet.ln_refcount); + LASSERT(list_empty(&the_lnet.ln_nis_zombie)); + + lnet_net_lock(LNET_LOCK_EX); + the_lnet.ln_shutdown = 1; /* flag shutdown */ + + /* Unlink NIs from the global table */ + list_for_each_entry_safe(ni, temp, &the_lnet.ln_nis, ni_list) { + lnet_ni_unlink_locked(ni); + } + /* Drop the cached loopback NI. */ + if (the_lnet.ln_loni) { + lnet_ni_decref_locked(the_lnet.ln_loni, 0); + the_lnet.ln_loni = NULL; + } + + lnet_net_unlock(LNET_LOCK_EX); + + /* + * Clear lazy portals and drop delayed messages which hold refs + * on their lnet_msg_t::msg_rxpeer + */ + for (i = 0; i < the_lnet.ln_nportals; i++) + LNetClearLazyPortal(i); + + /* + * Clear the peer table and wait for all peers to go (they hold refs on + * their NIs) + */ + lnet_peer_tables_cleanup(NULL); + + lnet_net_lock(LNET_LOCK_EX); + + lnet_clear_zombies_nis_locked(); the_lnet.ln_shutdown = 0; lnet_net_unlock(LNET_LOCK_EX); +} - if (the_lnet.ln_network_tokens != NULL) { - LIBCFS_FREE(the_lnet.ln_network_tokens, - the_lnet.ln_network_tokens_nob); - the_lnet.ln_network_tokens = NULL; - } +/* shutdown down the NI and release refcount */ +static void +lnet_shutdown_lndni(struct lnet_ni *ni) +{ + int i; + + lnet_net_lock(LNET_LOCK_EX); + lnet_ni_unlink_locked(ni); + lnet_net_unlock(LNET_LOCK_EX); + + /* clear messages for this NI on the lazy portal */ + for (i = 0; i < the_lnet.ln_nportals; i++) + lnet_clear_lazy_portal(ni, i, "Shutting down NI"); + + /* Do peer table cleanup for this ni */ + lnet_peer_tables_cleanup(ni); + + lnet_net_lock(LNET_LOCK_EX); + lnet_clear_zombies_nis_locked(); + lnet_net_unlock(LNET_LOCK_EX); } static int -lnet_startup_lndnis(void) +lnet_startup_lndni(struct lnet_ni *ni, __s32 peer_timeout, + __s32 peer_cr, __s32 peer_buf_cr, __s32 credits) { + int rc = -EINVAL; + int lnd_type; lnd_t *lnd; - struct lnet_ni *ni; struct lnet_tx_queue *tq; - struct list_head nilist; int i; - int rc = 0; - __u32 lnd_type; - int nicount = 0; - char *nets = lnet_get_networks(); - INIT_LIST_HEAD(&nilist); + lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid)); - if (nets == NULL) - goto failed; + LASSERT(libcfs_isknown_lnd(lnd_type)); - rc = lnet_parse_networks(&nilist, nets); - if (rc != 0) - goto failed; + if (lnd_type == CIBLND || lnd_type == OPENIBLND || + lnd_type == IIBLND || lnd_type == VIBLND) { + CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type)); + goto failed0; + } - while (!list_empty(&nilist)) { - ni = list_entry(nilist.next, lnet_ni_t, ni_list); - lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid)); + /* Make sure this new NI is unique. */ + lnet_net_lock(LNET_LOCK_EX); + rc = lnet_net_unique(LNET_NIDNET(ni->ni_nid), &the_lnet.ln_nis); + lnet_net_unlock(LNET_LOCK_EX); + if (!rc) { + if (lnd_type == LOLND) { + lnet_ni_free(ni); + return 0; + } - LASSERT(libcfs_isknown_lnd(lnd_type)); + CERROR("Net %s is not unique\n", + libcfs_net2str(LNET_NIDNET(ni->ni_nid))); + rc = -EEXIST; + goto failed0; + } - if (lnd_type == CIBLND || - lnd_type == OPENIBLND || - lnd_type == IIBLND || - lnd_type == VIBLND) { - CERROR("LND %s obsoleted\n", - libcfs_lnd2str(lnd_type)); - goto failed; - } + mutex_lock(&the_lnet.ln_lnd_mutex); + lnd = lnet_find_lnd_by_type(lnd_type); + if (!lnd) { + mutex_unlock(&the_lnet.ln_lnd_mutex); + rc = request_module("%s", libcfs_lnd2modname(lnd_type)); mutex_lock(&the_lnet.ln_lnd_mutex); - lnd = lnet_find_lnd_by_type(lnd_type); - if (lnd == NULL) { + lnd = lnet_find_lnd_by_type(lnd_type); + if (!lnd) { mutex_unlock(&the_lnet.ln_lnd_mutex); - rc = request_module("%s", - libcfs_lnd2modname(lnd_type)); - mutex_lock(&the_lnet.ln_lnd_mutex); - - lnd = lnet_find_lnd_by_type(lnd_type); - if (lnd == NULL) { - mutex_unlock(&the_lnet.ln_lnd_mutex); - CERROR("Can't load LND %s, module %s, rc=%d\n", - libcfs_lnd2str(lnd_type), - libcfs_lnd2modname(lnd_type), rc); - goto failed; - } + CERROR("Can't load LND %s, module %s, rc=%d\n", + libcfs_lnd2str(lnd_type), + libcfs_lnd2modname(lnd_type), rc); + rc = -EINVAL; + goto failed0; } + } - lnet_net_lock(LNET_LOCK_EX); - lnd->lnd_refcount++; - lnet_net_unlock(LNET_LOCK_EX); - - ni->ni_lnd = lnd; - - rc = (lnd->lnd_startup)(ni); - - mutex_unlock(&the_lnet.ln_lnd_mutex); + lnet_net_lock(LNET_LOCK_EX); + lnd->lnd_refcount++; + lnet_net_unlock(LNET_LOCK_EX); - if (rc != 0) { - LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n", - rc, libcfs_lnd2str(lnd->lnd_type)); - lnet_net_lock(LNET_LOCK_EX); - lnd->lnd_refcount--; - lnet_net_unlock(LNET_LOCK_EX); - goto failed; - } + ni->ni_lnd = lnd; - LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query != NULL); + rc = lnd->lnd_startup(ni); - list_del(&ni->ni_list); + mutex_unlock(&the_lnet.ln_lnd_mutex); + if (rc) { + LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n", + rc, libcfs_lnd2str(lnd->lnd_type)); lnet_net_lock(LNET_LOCK_EX); - /* refcount for ln_nis */ - lnet_ni_addref_locked(ni, 0); - list_add_tail(&ni->ni_list, &the_lnet.ln_nis); - if (ni->ni_cpts != NULL) { - list_add_tail(&ni->ni_cptlist, - &the_lnet.ln_nis_cpt); - lnet_ni_addref_locked(ni, 0); - } - + lnd->lnd_refcount--; lnet_net_unlock(LNET_LOCK_EX); + goto failed0; + } - if (lnd->lnd_type == LOLND) { - lnet_ni_addref(ni); - LASSERT(the_lnet.ln_loni == NULL); - the_lnet.ln_loni = ni; - continue; - } + /* + * If given some LND tunable parameters, parse those now to + * override the values in the NI structure. + */ + if (peer_buf_cr >= 0) + ni->ni_peerrtrcredits = peer_buf_cr; + if (peer_timeout >= 0) + ni->ni_peertimeout = peer_timeout; + /* + * TODO + * Note: For now, don't allow the user to change + * peertxcredits as this number is used in the + * IB LND to control queue depth. + * if (peer_cr != -1) + * ni->ni_peertxcredits = peer_cr; + */ + if (credits >= 0) + ni->ni_maxtxcredits = credits; - if (ni->ni_peertxcredits == 0 || - ni->ni_maxtxcredits == 0) { - LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n", - libcfs_lnd2str(lnd->lnd_type), - ni->ni_peertxcredits == 0 ? - "" : "per-peer "); - goto failed; - } + LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query); - cfs_percpt_for_each(tq, i, ni->ni_tx_queues) { - tq->tq_credits_min = - tq->tq_credits_max = - tq->tq_credits = lnet_ni_tq_credits(ni); - } + lnet_net_lock(LNET_LOCK_EX); + /* refcount for ln_nis */ + lnet_ni_addref_locked(ni, 0); + list_add_tail(&ni->ni_list, &the_lnet.ln_nis); + if (ni->ni_cpts) { + lnet_ni_addref_locked(ni, 0); + list_add_tail(&ni->ni_cptlist, &the_lnet.ln_nis_cpt); + } + + lnet_net_unlock(LNET_LOCK_EX); - CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n", - libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits, - lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER, - ni->ni_peerrtrcredits, ni->ni_peertimeout); + if (lnd->lnd_type == LOLND) { + lnet_ni_addref(ni); + LASSERT(!the_lnet.ln_loni); + the_lnet.ln_loni = ni; + return 0; + } - nicount++; + if (!ni->ni_peertxcredits || !ni->ni_maxtxcredits) { + LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n", + libcfs_lnd2str(lnd->lnd_type), + !ni->ni_peertxcredits ? + "" : "per-peer "); + /* + * shutdown the NI since if we get here then it must've already + * been started + */ + lnet_shutdown_lndni(ni); + return -EINVAL; } - if (the_lnet.ln_eq_waitni != NULL && nicount > 1) { - lnd_type = the_lnet.ln_eq_waitni->ni_lnd->lnd_type; - LCONSOLE_ERROR_MSG(0x109, "LND %s can only run single-network\n", - libcfs_lnd2str(lnd_type)); - goto failed; + cfs_percpt_for_each(tq, i, ni->ni_tx_queues) { + tq->tq_credits_min = + tq->tq_credits_max = + tq->tq_credits = lnet_ni_tq_credits(ni); } + CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n", + libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits, + lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER, + ni->ni_peerrtrcredits, ni->ni_peertimeout); + return 0; +failed0: + lnet_ni_free(ni); + return rc; +} - failed: - lnet_shutdown_lndnis(); +static int +lnet_startup_lndnis(struct list_head *nilist) +{ + struct lnet_ni *ni; + int rc; + int ni_count = 0; - while (!list_empty(&nilist)) { - ni = list_entry(nilist.next, lnet_ni_t, ni_list); + while (!list_empty(nilist)) { + ni = list_entry(nilist->next, lnet_ni_t, ni_list); list_del(&ni->ni_list); - lnet_ni_free(ni); + rc = lnet_startup_lndni(ni, -1, -1, -1, -1); + + if (rc < 0) + goto failed; + + ni_count++; } - return -ENETDOWN; + return ni_count; +failed: + lnet_shutdown_lndnis(); + + return rc; } /** * Initialize LNet library. * - * Only userspace program needs to call this function - it's automatically - * called in the kernel at module loading time. Caller has to call lnet_fini() - * after a call to lnet_init(), if and only if the latter returned 0. It must - * be called exactly once. + * Automatically called at module loading time. Caller has to call + * lnet_lib_exit() after a call to lnet_lib_init(), if and only if the + * latter returned 0. It must be called exactly once. * - * \return 0 on success, and -ve on failures. + * \retval 0 on success + * \retval -ve on failures. */ -int -lnet_init(void) +int lnet_lib_init(void) { int rc; lnet_assert_wire_constants(); - LASSERT(!the_lnet.ln_init); memset(&the_lnet, 0, sizeof(the_lnet)); @@ -1117,28 +1409,29 @@ lnet_init(void) /* we are under risk of consuming all lh_cookie */ CERROR("Can't have %d CPTs for LNet (max allowed is %d), please change setting of CPT-table and retry\n", the_lnet.ln_cpt_number, LNET_CPT_MAX); - return -1; + return -E2BIG; } while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number) the_lnet.ln_cpt_bits++; rc = lnet_create_locks(); - if (rc != 0) { + if (rc) { CERROR("Can't create LNet global locks: %d\n", rc); - return -1; + return rc; } the_lnet.ln_refcount = 0; - the_lnet.ln_init = 1; LNetInvalidateHandle(&the_lnet.ln_rc_eqh); INIT_LIST_HEAD(&the_lnet.ln_lnds); INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie); INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow); - /* The hash table size is the number of bits it takes to express the set + /* + * The hash table size is the number of bits it takes to express the set * ln_num_routes, minus 1 (better to under estimate than over so we - * don't waste memory). */ + * don't waste memory). + */ if (rnet_htable_size <= 0) rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT; else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX) @@ -1146,9 +1439,11 @@ lnet_init(void) the_lnet.ln_remote_nets_hbits = max_t(int, 1, order_base_2(rnet_htable_size) - 1); - /* All LNDs apart from the LOLND are in separate modules. They + /* + * All LNDs apart from the LOLND are in separate modules. They * register themselves when their module loads, and unregister - * themselves when their module is unloaded. */ + * themselves when their module is unloaded. + */ lnet_register_lnd(&the_lolnd); return 0; } @@ -1156,30 +1451,22 @@ lnet_init(void) /** * Finalize LNet library. * - * Only userspace program needs to call this function. It can be called - * at most once. - * - * \pre lnet_init() called with success. + * \pre lnet_lib_init() called with success. * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls. */ -void -lnet_fini(void) +void lnet_lib_exit(void) { - LASSERT(the_lnet.ln_init); - LASSERT(the_lnet.ln_refcount == 0); + LASSERT(!the_lnet.ln_refcount); while (!list_empty(&the_lnet.ln_lnds)) lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next, - lnd_t, lnd_list)); + lnd_t, lnd_list)); lnet_destroy_locks(); - - the_lnet.ln_init = 0; } /** * Set LNet PID and start LNet interfaces, routing, and forwarding. * - * Userspace program should call this after a successful call to lnet_init(). * Users must call this function at least once before any other functions. * For each successful call there must be a corresponding call to * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is @@ -1197,77 +1484,114 @@ LNetNIInit(lnet_pid_t requested_pid) { int im_a_router = 0; int rc; + int ni_count; + lnet_ping_info_t *pinfo; + lnet_handle_md_t md_handle; + struct list_head net_head; + + INIT_LIST_HEAD(&net_head); mutex_lock(&the_lnet.ln_api_mutex); - LASSERT(the_lnet.ln_init); CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount); if (the_lnet.ln_refcount > 0) { rc = the_lnet.ln_refcount++; - goto out; + mutex_unlock(&the_lnet.ln_api_mutex); + return rc; } - if (requested_pid == LNET_PID_ANY) { - /* Don't instantiate LNET just for me */ - rc = -ENETDOWN; - goto failed0; + rc = lnet_prepare(requested_pid); + if (rc) { + mutex_unlock(&the_lnet.ln_api_mutex); + return rc; } - rc = lnet_prepare(requested_pid); - if (rc != 0) - goto failed0; + /* Add in the loopback network */ + if (!lnet_ni_alloc(LNET_MKNET(LOLND, 0), NULL, &net_head)) { + rc = -ENOMEM; + goto err_empty_list; + } - rc = lnet_startup_lndnis(); - if (rc != 0) - goto failed1; + /* + * If LNet is being initialized via DLC it is possible + * that the user requests not to load module parameters (ones which + * are supported by DLC) on initialization. Therefore, make sure not + * to load networks, routes and forwarding from module parameters + * in this case. On cleanup in case of failure only clean up + * routes if it has been loaded + */ + if (!the_lnet.ln_nis_from_mod_params) { + rc = lnet_parse_networks(&net_head, lnet_get_networks()); + if (rc < 0) + goto err_empty_list; + } + + ni_count = lnet_startup_lndnis(&net_head); + if (ni_count < 0) { + rc = ni_count; + goto err_empty_list; + } - rc = lnet_parse_routes(lnet_get_routes(), &im_a_router); - if (rc != 0) - goto failed2; + if (!the_lnet.ln_nis_from_mod_params) { + rc = lnet_parse_routes(lnet_get_routes(), &im_a_router); + if (rc) + goto err_shutdown_lndnis; - rc = lnet_check_routes(); - if (rc != 0) - goto failed2; + rc = lnet_check_routes(); + if (rc) + goto err_destory_routes; - rc = lnet_rtrpools_alloc(im_a_router); - if (rc != 0) - goto failed2; + rc = lnet_rtrpools_alloc(im_a_router); + if (rc) + goto err_destory_routes; + } rc = lnet_acceptor_start(); - if (rc != 0) - goto failed2; + if (rc) + goto err_destory_routes; the_lnet.ln_refcount = 1; /* Now I may use my own API functions... */ - /* NB router checker needs the_lnet.ln_ping_info in - * lnet_router_checker -> lnet_update_ni_status_locked */ - rc = lnet_ping_target_init(); - if (rc != 0) - goto failed3; + rc = lnet_ping_info_setup(&pinfo, &md_handle, ni_count, true); + if (rc) + goto err_acceptor_stop; + + lnet_ping_target_update(pinfo, md_handle); rc = lnet_router_checker_start(); - if (rc != 0) - goto failed4; + if (rc) + goto err_stop_ping; + lnet_fault_init(); lnet_router_debugfs_init(); - goto out; - failed4: + mutex_unlock(&the_lnet.ln_api_mutex); + + return 0; + +err_stop_ping: lnet_ping_target_fini(); - failed3: +err_acceptor_stop: the_lnet.ln_refcount = 0; lnet_acceptor_stop(); - failed2: - lnet_destroy_routes(); +err_destory_routes: + if (!the_lnet.ln_nis_from_mod_params) + lnet_destroy_routes(); +err_shutdown_lndnis: lnet_shutdown_lndnis(); - failed1: +err_empty_list: lnet_unprepare(); - failed0: LASSERT(rc < 0); - out: mutex_unlock(&the_lnet.ln_api_mutex); + while (!list_empty(&net_head)) { + struct lnet_ni *ni; + + ni = list_entry(net_head.next, struct lnet_ni, ni_list); + list_del_init(&ni->ni_list); + lnet_ni_free(ni); + } return rc; } EXPORT_SYMBOL(LNetNIInit); @@ -1286,7 +1610,6 @@ LNetNIFini(void) { mutex_lock(&the_lnet.ln_api_mutex); - LASSERT(the_lnet.ln_init); LASSERT(the_lnet.ln_refcount > 0); if (the_lnet.ln_refcount != 1) { @@ -1294,6 +1617,7 @@ LNetNIFini(void) } else { LASSERT(!the_lnet.ln_niinit_self); + lnet_fault_fini(); lnet_router_debugfs_fini(); lnet_router_checker_stop(); lnet_ping_target_fini(); @@ -1313,30 +1637,233 @@ LNetNIFini(void) EXPORT_SYMBOL(LNetNIFini); /** - * This is an ugly hack to export IOC_LIBCFS_DEBUG_PEER and - * IOC_LIBCFS_PORTALS_COMPATIBILITY commands to users, by tweaking the LNet - * internal ioctl handler. + * Grabs the ni data from the ni structure and fills the out + * parameters * - * IOC_LIBCFS_PORTALS_COMPATIBILITY is now deprecated, don't use it. - * - * \param cmd IOC_LIBCFS_DEBUG_PEER to print debugging data about a peer. - * The data will be printed to system console. Don't use it excessively. - * \param arg A pointer to lnet_process_id_t, process ID of the peer. + * \param[in] ni network interface structure + * \param[out] cpt_count the number of cpts the ni is on + * \param[out] nid Network Interface ID + * \param[out] peer_timeout NI peer timeout + * \param[out] peer_tx_crdits NI peer transmit credits + * \param[out] peer_rtr_credits NI peer router credits + * \param[out] max_tx_credits NI max transmit credit + * \param[out] net_config Network configuration + */ +static void +lnet_fill_ni_info(struct lnet_ni *ni, __u32 *cpt_count, __u64 *nid, + int *peer_timeout, int *peer_tx_credits, + int *peer_rtr_credits, int *max_tx_credits, + struct lnet_ioctl_net_config *net_config) +{ + int i; + + if (!ni) + return; + + if (!net_config) + return; + + BUILD_BUG_ON(ARRAY_SIZE(ni->ni_interfaces) != + ARRAY_SIZE(net_config->ni_interfaces)); + + for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) { + if (!ni->ni_interfaces[i]) + break; + + strncpy(net_config->ni_interfaces[i], + ni->ni_interfaces[i], + sizeof(net_config->ni_interfaces[i])); + } + + *nid = ni->ni_nid; + *peer_timeout = ni->ni_peertimeout; + *peer_tx_credits = ni->ni_peertxcredits; + *peer_rtr_credits = ni->ni_peerrtrcredits; + *max_tx_credits = ni->ni_maxtxcredits; + + net_config->ni_status = ni->ni_status->ns_status; + + if (ni->ni_cpts) { + int num_cpts = min(ni->ni_ncpts, LNET_MAX_SHOW_NUM_CPT); + + for (i = 0; i < num_cpts; i++) + net_config->ni_cpts[i] = ni->ni_cpts[i]; + + *cpt_count = num_cpts; + } +} + +int +lnet_get_net_config(int idx, __u32 *cpt_count, __u64 *nid, int *peer_timeout, + int *peer_tx_credits, int *peer_rtr_credits, + int *max_tx_credits, + struct lnet_ioctl_net_config *net_config) +{ + struct lnet_ni *ni; + struct list_head *tmp; + int cpt, i = 0; + int rc = -ENOENT; + + cpt = lnet_net_lock_current(); + + list_for_each(tmp, &the_lnet.ln_nis) { + if (i++ != idx) + continue; + + ni = list_entry(tmp, lnet_ni_t, ni_list); + lnet_ni_lock(ni); + lnet_fill_ni_info(ni, cpt_count, nid, peer_timeout, + peer_tx_credits, peer_rtr_credits, + max_tx_credits, net_config); + lnet_ni_unlock(ni); + rc = 0; + break; + } + + lnet_net_unlock(cpt); + return rc; +} + +int +lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets, + __s32 peer_timeout, __s32 peer_cr, __s32 peer_buf_cr, + __s32 credits) +{ + lnet_ping_info_t *pinfo; + lnet_handle_md_t md_handle; + struct lnet_ni *ni; + struct list_head net_head; + lnet_remotenet_t *rnet; + int rc; + + INIT_LIST_HEAD(&net_head); + + /* Create a ni structure for the network string */ + rc = lnet_parse_networks(&net_head, nets); + if (rc <= 0) + return !rc ? -EINVAL : rc; + + mutex_lock(&the_lnet.ln_api_mutex); + + if (rc > 1) { + rc = -EINVAL; /* only add one interface per call */ + goto failed0; + } + + ni = list_entry(net_head.next, struct lnet_ni, ni_list); + + lnet_net_lock(LNET_LOCK_EX); + rnet = lnet_find_net_locked(LNET_NIDNET(ni->ni_nid)); + lnet_net_unlock(LNET_LOCK_EX); + /* + * make sure that the net added doesn't invalidate the current + * configuration LNet is keeping + */ + if (rnet) { + CERROR("Adding net %s will invalidate routing configuration\n", + nets); + rc = -EUSERS; + goto failed0; + } + + rc = lnet_ping_info_setup(&pinfo, &md_handle, 1 + lnet_get_ni_count(), + false); + if (rc) + goto failed0; + + list_del_init(&ni->ni_list); + + rc = lnet_startup_lndni(ni, peer_timeout, peer_cr, + peer_buf_cr, credits); + if (rc) + goto failed1; + + if (ni->ni_lnd->lnd_accept) { + rc = lnet_acceptor_start(); + if (rc < 0) { + /* shutdown the ni that we just started */ + CERROR("Failed to start up acceptor thread\n"); + lnet_shutdown_lndni(ni); + goto failed1; + } + } + + lnet_ping_target_update(pinfo, md_handle); + mutex_unlock(&the_lnet.ln_api_mutex); + + return 0; + +failed1: + lnet_ping_md_unlink(pinfo, &md_handle); + lnet_ping_info_free(pinfo); +failed0: + mutex_unlock(&the_lnet.ln_api_mutex); + while (!list_empty(&net_head)) { + ni = list_entry(net_head.next, struct lnet_ni, ni_list); + list_del_init(&ni->ni_list); + lnet_ni_free(ni); + } + return rc; +} + +int +lnet_dyn_del_ni(__u32 net) +{ + lnet_ni_t *ni; + lnet_ping_info_t *pinfo; + lnet_handle_md_t md_handle; + int rc; + + /* don't allow userspace to shutdown the LOLND */ + if (LNET_NETTYP(net) == LOLND) + return -EINVAL; + + mutex_lock(&the_lnet.ln_api_mutex); + /* create and link a new ping info, before removing the old one */ + rc = lnet_ping_info_setup(&pinfo, &md_handle, + lnet_get_ni_count() - 1, false); + if (rc) + goto out; + + ni = lnet_net2ni(net); + if (!ni) { + rc = -EINVAL; + goto failed; + } + + /* decrement the reference counter taken by lnet_net2ni() */ + lnet_ni_decref_locked(ni, 0); + + lnet_shutdown_lndni(ni); + + if (!lnet_count_acceptor_nis()) + lnet_acceptor_stop(); + + lnet_ping_target_update(pinfo, md_handle); + goto out; +failed: + lnet_ping_md_unlink(pinfo, &md_handle); + lnet_ping_info_free(pinfo); +out: + mutex_unlock(&the_lnet.ln_api_mutex); + + return rc; +} + +/** + * LNet ioctl handler. * - * \return Always return 0 when called by users directly (i.e., not via ioctl). */ int LNetCtl(unsigned int cmd, void *arg) { struct libcfs_ioctl_data *data = arg; + struct lnet_ioctl_config_data *config; lnet_process_id_t id = {0}; lnet_ni_t *ni; int rc; unsigned long secs_passed; - LASSERT(the_lnet.ln_init); - LASSERT(the_lnet.ln_refcount > 0); - switch (cmd) { case IOC_LIBCFS_GET_NI: rc = LNetGetId(data->ioc_count, &id); @@ -1347,26 +1874,149 @@ LNetCtl(unsigned int cmd, void *arg) return lnet_fail_nid(data->ioc_nid, data->ioc_count); case IOC_LIBCFS_ADD_ROUTE: - rc = lnet_add_route(data->ioc_net, data->ioc_count, - data->ioc_nid, data->ioc_priority); - return (rc != 0) ? rc : lnet_check_routes(); + config = arg; + + if (config->cfg_hdr.ioc_len < sizeof(*config)) + return -EINVAL; + + mutex_lock(&the_lnet.ln_api_mutex); + rc = lnet_add_route(config->cfg_net, + config->cfg_config_u.cfg_route.rtr_hop, + config->cfg_nid, + config->cfg_config_u.cfg_route.rtr_priority); + if (!rc) { + rc = lnet_check_routes(); + if (rc) + lnet_del_route(config->cfg_net, + config->cfg_nid); + } + mutex_unlock(&the_lnet.ln_api_mutex); + return rc; case IOC_LIBCFS_DEL_ROUTE: - return lnet_del_route(data->ioc_net, data->ioc_nid); + config = arg; + + if (config->cfg_hdr.ioc_len < sizeof(*config)) + return -EINVAL; + + mutex_lock(&the_lnet.ln_api_mutex); + rc = lnet_del_route(config->cfg_net, config->cfg_nid); + mutex_unlock(&the_lnet.ln_api_mutex); + return rc; case IOC_LIBCFS_GET_ROUTE: - return lnet_get_route(data->ioc_count, - &data->ioc_net, &data->ioc_count, - &data->ioc_nid, &data->ioc_flags, - &data->ioc_priority); + config = arg; + + if (config->cfg_hdr.ioc_len < sizeof(*config)) + return -EINVAL; + + return lnet_get_route(config->cfg_count, + &config->cfg_net, + &config->cfg_config_u.cfg_route.rtr_hop, + &config->cfg_nid, + &config->cfg_config_u.cfg_route.rtr_flags, + &config->cfg_config_u.cfg_route.rtr_priority); + + case IOC_LIBCFS_GET_NET: { + struct lnet_ioctl_net_config *net_config; + size_t total = sizeof(*config) + sizeof(*net_config); + + config = arg; + + if (config->cfg_hdr.ioc_len < total) + return -EINVAL; + + net_config = (struct lnet_ioctl_net_config *) + config->cfg_bulk; + if (!net_config) + return -EINVAL; + + return lnet_get_net_config(config->cfg_count, + &config->cfg_ncpts, + &config->cfg_nid, + &config->cfg_config_u.cfg_net.net_peer_timeout, + &config->cfg_config_u.cfg_net.net_peer_tx_credits, + &config->cfg_config_u.cfg_net.net_peer_rtr_credits, + &config->cfg_config_u.cfg_net.net_max_tx_credits, + net_config); + } + + case IOC_LIBCFS_GET_LNET_STATS: { + struct lnet_ioctl_lnet_stats *lnet_stats = arg; + + if (lnet_stats->st_hdr.ioc_len < sizeof(*lnet_stats)) + return -EINVAL; + + lnet_counters_get(&lnet_stats->st_cntrs); + return 0; + } + + case IOC_LIBCFS_CONFIG_RTR: + config = arg; + + if (config->cfg_hdr.ioc_len < sizeof(*config)) + return -EINVAL; + + mutex_lock(&the_lnet.ln_api_mutex); + if (config->cfg_config_u.cfg_buffers.buf_enable) { + rc = lnet_rtrpools_enable(); + mutex_unlock(&the_lnet.ln_api_mutex); + return rc; + } + lnet_rtrpools_disable(); + mutex_unlock(&the_lnet.ln_api_mutex); + return 0; + + case IOC_LIBCFS_ADD_BUF: + config = arg; + + if (config->cfg_hdr.ioc_len < sizeof(*config)) + return -EINVAL; + + mutex_lock(&the_lnet.ln_api_mutex); + rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.buf_tiny, + config->cfg_config_u.cfg_buffers.buf_small, + config->cfg_config_u.cfg_buffers.buf_large); + mutex_unlock(&the_lnet.ln_api_mutex); + return rc; + + case IOC_LIBCFS_GET_BUF: { + struct lnet_ioctl_pool_cfg *pool_cfg; + size_t total = sizeof(*config) + sizeof(*pool_cfg); + + config = arg; + + if (config->cfg_hdr.ioc_len < total) + return -EINVAL; + + pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk; + return lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg); + } + + case IOC_LIBCFS_GET_PEER_INFO: { + struct lnet_ioctl_peer *peer_info = arg; + + if (peer_info->pr_hdr.ioc_len < sizeof(*peer_info)) + return -EINVAL; + + return lnet_get_peer_info(peer_info->pr_count, + &peer_info->pr_nid, + peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness, + &peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt, + &peer_info->pr_lnd_u.pr_peer_credits.cr_refcount, + &peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits, + &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits, + &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits, + &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_rtr_credits, + &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob); + } + case IOC_LIBCFS_NOTIFY_ROUTER: secs_passed = (ktime_get_real_seconds() - data->ioc_u64[0]); - return lnet_notify(NULL, data->ioc_nid, data->ioc_flags, - jiffies - secs_passed * HZ); + secs_passed *= msecs_to_jiffies(MSEC_PER_SEC); - case IOC_LIBCFS_PORTALS_COMPATIBILITY: - /* This can be removed once lustre stops calling it */ - return 0; + return lnet_notify(NULL, data->ioc_nid, data->ioc_flags, + jiffies - secs_passed); case IOC_LIBCFS_LNET_DIST: rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]); @@ -1382,46 +2032,26 @@ LNetCtl(unsigned int cmd, void *arg) lnet_net_unlock(LNET_LOCK_EX); return 0; + case IOC_LIBCFS_LNET_FAULT: + return lnet_fault_ctl(data->ioc_flags, data); + case IOC_LIBCFS_PING: id.nid = data->ioc_nid; id.pid = data->ioc_u32[0]; rc = lnet_ping(id, data->ioc_u32[1], /* timeout */ - (lnet_process_id_t *)data->ioc_pbuf1, - data->ioc_plen1/sizeof(lnet_process_id_t)); + data->ioc_pbuf1, + data->ioc_plen1 / sizeof(lnet_process_id_t)); if (rc < 0) return rc; data->ioc_count = rc; return 0; - case IOC_LIBCFS_DEBUG_PEER: { - /* CAVEAT EMPTOR: this one designed for calling directly; not - * via an ioctl */ - id = *((lnet_process_id_t *) arg); - - lnet_debug_peer(id.nid); - - ni = lnet_net2ni(LNET_NIDNET(id.nid)); - if (ni == NULL) { - CDEBUG(D_WARNING, "No NI for %s\n", libcfs_id2str(id)); - } else { - if (ni->ni_lnd->lnd_ctl == NULL) { - CDEBUG(D_WARNING, "No ctl for %s\n", - libcfs_id2str(id)); - } else { - (void)ni->ni_lnd->lnd_ctl(ni, cmd, arg); - } - - lnet_ni_decref(ni); - } - return 0; - } - default: ni = lnet_net2ni(data->ioc_net); - if (ni == NULL) + if (!ni) return -EINVAL; - if (ni->ni_lnd->lnd_ctl == NULL) + if (!ni->ni_lnd->lnd_ctl) rc = -EINVAL; else rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg); @@ -1433,6 +2063,12 @@ LNetCtl(unsigned int cmd, void *arg) } EXPORT_SYMBOL(LNetCtl); +void LNetDebugPeer(lnet_process_id_t id) +{ + lnet_debug_peer(id.nid); +} +EXPORT_SYMBOL(LNetDebugPeer); + /** * Retrieve the lnet_process_id_t ID of LNet interface at \a index. Note that * all interfaces share a same PID, as requested by LNetNIInit(). @@ -1452,16 +2088,12 @@ LNetGetId(unsigned int index, lnet_process_id_t *id) int cpt; int rc = -ENOENT; - LASSERT(the_lnet.ln_init); - - /* LNetNI initilization failed? */ - if (the_lnet.ln_refcount == 0) - return rc; + LASSERT(the_lnet.ln_refcount > 0); cpt = lnet_net_lock_current(); list_for_each(tmp, &the_lnet.ln_nis) { - if (index-- != 0) + if (index--) continue; ni = list_entry(tmp, lnet_ni_t, ni_list); @@ -1488,192 +2120,8 @@ LNetSnprintHandle(char *str, int len, lnet_handle_any_t h) } EXPORT_SYMBOL(LNetSnprintHandle); -static int -lnet_create_ping_info(void) -{ - int i; - int n; - int rc; - unsigned int infosz; - lnet_ni_t *ni; - lnet_process_id_t id; - lnet_ping_info_t *pinfo; - - for (n = 0; ; n++) { - rc = LNetGetId(n, &id); - if (rc == -ENOENT) - break; - - LASSERT(rc == 0); - } - - infosz = offsetof(lnet_ping_info_t, pi_ni[n]); - LIBCFS_ALLOC(pinfo, infosz); - if (pinfo == NULL) { - CERROR("Can't allocate ping info[%d]\n", n); - return -ENOMEM; - } - - pinfo->pi_nnis = n; - pinfo->pi_pid = the_lnet.ln_pid; - pinfo->pi_magic = LNET_PROTO_PING_MAGIC; - pinfo->pi_features = LNET_PING_FEAT_NI_STATUS; - - for (i = 0; i < n; i++) { - lnet_ni_status_t *ns = &pinfo->pi_ni[i]; - - rc = LNetGetId(i, &id); - LASSERT(rc == 0); - - ns->ns_nid = id.nid; - ns->ns_status = LNET_NI_STATUS_UP; - - lnet_net_lock(0); - - ni = lnet_nid2ni_locked(id.nid, 0); - LASSERT(ni != NULL); - - lnet_ni_lock(ni); - LASSERT(ni->ni_status == NULL); - ni->ni_status = ns; - lnet_ni_unlock(ni); - - lnet_ni_decref_locked(ni, 0); - lnet_net_unlock(0); - } - - the_lnet.ln_ping_info = pinfo; - return 0; -} - -static void -lnet_destroy_ping_info(void) -{ - struct lnet_ni *ni; - - lnet_net_lock(0); - - list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) { - lnet_ni_lock(ni); - ni->ni_status = NULL; - lnet_ni_unlock(ni); - } - - lnet_net_unlock(0); - - LIBCFS_FREE(the_lnet.ln_ping_info, - offsetof(lnet_ping_info_t, - pi_ni[the_lnet.ln_ping_info->pi_nnis])); - the_lnet.ln_ping_info = NULL; -} - -int -lnet_ping_target_init(void) -{ - lnet_md_t md = { NULL }; - lnet_handle_me_t meh; - lnet_process_id_t id; - int rc; - int rc2; - int infosz; - - rc = lnet_create_ping_info(); - if (rc != 0) - return rc; - - /* We can have a tiny EQ since we only need to see the unlink event on - * teardown, which by definition is the last one! */ - rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &the_lnet.ln_ping_target_eq); - if (rc != 0) { - CERROR("Can't allocate ping EQ: %d\n", rc); - goto failed_0; - } - - memset(&id, 0, sizeof(lnet_process_id_t)); - id.nid = LNET_NID_ANY; - id.pid = LNET_PID_ANY; - - rc = LNetMEAttach(LNET_RESERVED_PORTAL, id, - LNET_PROTO_PING_MATCHBITS, 0, - LNET_UNLINK, LNET_INS_AFTER, - &meh); - if (rc != 0) { - CERROR("Can't create ping ME: %d\n", rc); - goto failed_1; - } - - /* initialize md content */ - infosz = offsetof(lnet_ping_info_t, - pi_ni[the_lnet.ln_ping_info->pi_nnis]); - md.start = the_lnet.ln_ping_info; - md.length = infosz; - md.threshold = LNET_MD_THRESH_INF; - md.max_size = 0; - md.options = LNET_MD_OP_GET | LNET_MD_TRUNCATE | - LNET_MD_MANAGE_REMOTE; - md.user_ptr = NULL; - md.eq_handle = the_lnet.ln_ping_target_eq; - - rc = LNetMDAttach(meh, md, - LNET_RETAIN, - &the_lnet.ln_ping_target_md); - if (rc != 0) { - CERROR("Can't attach ping MD: %d\n", rc); - goto failed_2; - } - - return 0; - - failed_2: - rc2 = LNetMEUnlink(meh); - LASSERT(rc2 == 0); - failed_1: - rc2 = LNetEQFree(the_lnet.ln_ping_target_eq); - LASSERT(rc2 == 0); - failed_0: - lnet_destroy_ping_info(); - return rc; -} - -void -lnet_ping_target_fini(void) -{ - lnet_event_t event; - int rc; - int which; - int timeout_ms = 1000; - sigset_t blocked = cfs_block_allsigs(); - - LNetMDUnlink(the_lnet.ln_ping_target_md); - /* NB md could be busy; this just starts the unlink */ - - for (;;) { - rc = LNetEQPoll(&the_lnet.ln_ping_target_eq, 1, - timeout_ms, &event, &which); - - /* I expect overflow... */ - LASSERT(rc >= 0 || rc == -EOVERFLOW); - - if (rc == 0) { - /* timed out: provide a diagnostic */ - CWARN("Still waiting for ping MD to unlink\n"); - timeout_ms *= 2; - continue; - } - - /* Got a valid event */ - if (event.unlinked) - break; - } - - rc = LNetEQFree(the_lnet.ln_ping_target_eq); - LASSERT(rc == 0); - lnet_destroy_ping_info(); - cfs_restore_sigs(blocked); -} - -int -lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t *ids, int n_ids) +static int lnet_ping(lnet_process_id_t id, int timeout_ms, + lnet_process_id_t __user *ids, int n_ids) { lnet_handle_eq_t eqh; lnet_handle_md_t mdh; @@ -1683,7 +2131,7 @@ lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t *ids, int n_id int unlinked = 0; int replied = 0; const int a_long_time = 60000; /* mS */ - int infosz = offsetof(lnet_ping_info_t, pi_ni[n_ids]); + int infosz; lnet_ping_info_t *info; lnet_process_id_t tmpid; int i; @@ -1692,6 +2140,8 @@ lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t *ids, int n_id int rc2; sigset_t blocked; + infosz = offsetof(lnet_ping_info_t, pi_ni[n_ids]); + if (n_ids <= 0 || id.nid == LNET_NID_ANY || timeout_ms > 500000 || /* arbitrary limit! */ @@ -1699,15 +2149,15 @@ lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t *ids, int n_id return -EINVAL; if (id.pid == LNET_PID_ANY) - id.pid = LUSTRE_SRV_LNET_PID; + id.pid = LNET_PID_LUSTRE; LIBCFS_ALLOC(info, infosz); - if (info == NULL) + if (!info) return -ENOMEM; /* NB 2 events max (including any unlink event) */ rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh); - if (rc != 0) { + if (rc) { CERROR("Can't allocate EQ: %d\n", rc); goto out_0; } @@ -1722,7 +2172,7 @@ lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t *ids, int n_id md.eq_handle = eqh; rc = LNetMDBind(md, LNET_UNLINK, &mdh); - if (rc != 0) { + if (rc) { CERROR("Can't bind MD: %d\n", rc); goto out_1; } @@ -1731,11 +2181,11 @@ lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t *ids, int n_id LNET_RESERVED_PORTAL, LNET_PROTO_PING_MATCHBITS, 0); - if (rc != 0) { + if (rc) { /* Don't CERROR; this could be deliberate! */ rc2 = LNetMDUnlink(mdh); - LASSERT(rc2 == 0); + LASSERT(!rc2); /* NB must wait for the UNLINK event below... */ unlinked = 1; @@ -1759,11 +2209,11 @@ lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t *ids, int n_id LASSERT(rc2 != -EOVERFLOW); /* can't miss anything */ - if (rc2 <= 0 || event.status != 0) { + if (rc2 <= 0 || event.status) { /* timeout or error */ - if (!replied && rc == 0) + if (!replied && !rc) rc = (rc2 < 0) ? rc2 : - (rc2 == 0) ? -ETIMEDOUT : + !rc2 ? -ETIMEDOUT : event.status; if (!unlinked) { @@ -1772,7 +2222,7 @@ lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t *ids, int n_id /* No assertion (racing with network) */ unlinked = 1; timeout_ms = a_long_time; - } else if (rc2 == 0) { + } else if (!rc2) { /* timed out waiting for unlink */ CWARN("ping %s: late network completion\n", libcfs_id2str(id)); @@ -1812,7 +2262,7 @@ lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t *ids, int n_id goto out_1; } - if ((info->pi_features & LNET_PING_FEAT_NI_STATUS) == 0) { + if (!(info->pi_features & LNET_PING_FEAT_NI_STATUS)) { CERROR("%s: ping w/o NI status: 0x%x\n", libcfs_id2str(id), info->pi_features); goto out_1; @@ -1846,9 +2296,9 @@ lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t *ids, int n_id out_1: rc2 = LNetEQFree(eqh); - if (rc2 != 0) + if (rc2) CERROR("rc2 %d\n", rc2); - LASSERT(rc2 == 0); + LASSERT(!rc2); out_0: LIBCFS_FREE(info, infosz); diff --git a/drivers/staging/lustre/lnet/lnet/config.c b/drivers/staging/lustre/lnet/lnet/config.c index 284a3c271..449069c9e 100644 --- a/drivers/staging/lustre/lnet/lnet/config.c +++ b/drivers/staging/lustre/lnet/lnet/config.c @@ -37,15 +37,15 @@ #define DEBUG_SUBSYSTEM S_LNET #include "../../include/linux/lnet/lib-lnet.h" -struct lnet_text_buf_t { /* tmp struct for parsing routes */ +struct lnet_text_buf { /* tmp struct for parsing routes */ struct list_head ltb_list; /* stash on lists */ int ltb_size; /* allocated size */ char ltb_text[0]; /* text buffer */ }; static int lnet_tbnob; /* track text buf allocation */ -#define LNET_MAX_TEXTBUF_NOB (64<<10) /* bound allocation */ -#define LNET_SINGLE_TEXTBUF_NOB (4<<10) +#define LNET_MAX_TEXTBUF_NOB (64 << 10) /* bound allocation */ +#define LNET_SINGLE_TEXTBUF_NOB (4 << 10) static void lnet_syntax(char *name, char *str, int offset, int width) @@ -54,9 +54,9 @@ lnet_syntax(char *name, char *str, int offset, int width) static char dashes[LNET_SINGLE_TEXTBUF_NOB]; memset(dots, '.', sizeof(dots)); - dots[sizeof(dots)-1] = 0; + dots[sizeof(dots) - 1] = 0; memset(dashes, '-', sizeof(dashes)); - dashes[sizeof(dashes)-1] = 0; + dashes[sizeof(dashes) - 1] = 0; LCONSOLE_ERROR_MSG(0x10f, "Error parsing '%s=\"%s\"'\n", name, str); LCONSOLE_ERROR_MSG(0x110, "here...........%.*s..%.*s|%.*s|\n", @@ -77,7 +77,7 @@ lnet_issep(char c) } } -static int +int lnet_net_unique(__u32 net, struct list_head *nilist) { struct list_head *tmp; @@ -96,19 +96,25 @@ lnet_net_unique(__u32 net, struct list_head *nilist) void lnet_ni_free(struct lnet_ni *ni) { - if (ni->ni_refs != NULL) + int i; + + if (ni->ni_refs) cfs_percpt_free(ni->ni_refs); - if (ni->ni_tx_queues != NULL) + if (ni->ni_tx_queues) cfs_percpt_free(ni->ni_tx_queues); - if (ni->ni_cpts != NULL) + if (ni->ni_cpts) cfs_expr_list_values_free(ni->ni_cpts, ni->ni_ncpts); + for (i = 0; i < LNET_MAX_INTERFACES && ni->ni_interfaces[i]; i++) { + LIBCFS_FREE(ni->ni_interfaces[i], + strlen(ni->ni_interfaces[i]) + 1); + } LIBCFS_FREE(ni, sizeof(*ni)); } -static lnet_ni_t * +lnet_ni_t * lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist) { struct lnet_tx_queue *tq; @@ -123,7 +129,7 @@ lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist) } LIBCFS_ALLOC(ni, sizeof(*ni)); - if (ni == NULL) { + if (!ni) { CERROR("Out of memory creating network %s\n", libcfs_net2str(net)); return NULL; @@ -133,18 +139,18 @@ lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist) INIT_LIST_HEAD(&ni->ni_cptlist); ni->ni_refs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*ni->ni_refs[0])); - if (ni->ni_refs == NULL) + if (!ni->ni_refs) goto failed; ni->ni_tx_queues = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*ni->ni_tx_queues[0])); - if (ni->ni_tx_queues == NULL) + if (!ni->ni_tx_queues) goto failed; cfs_percpt_for_each(tq, i, ni->ni_tx_queues) INIT_LIST_HEAD(&tq->tq_delayed); - if (el == NULL) { + if (!el) { ni->ni_cpts = NULL; ni->ni_ncpts = LNET_CPT_NUMBER; } else { @@ -178,13 +184,19 @@ int lnet_parse_networks(struct list_head *nilist, char *networks) { struct cfs_expr_list *el = NULL; - int tokensize = strlen(networks) + 1; + int tokensize; char *tokens; char *str; char *tmp; struct lnet_ni *ni; __u32 net; int nnets = 0; + struct list_head *temp_node; + + if (!networks) { + CERROR("networks string is undefined\n"); + return -EINVAL; + } if (strlen(networks) > LNET_SINGLE_TEXTBUF_NOB) { /* _WAY_ conservative */ @@ -193,23 +205,19 @@ lnet_parse_networks(struct list_head *nilist, char *networks) return -EINVAL; } + tokensize = strlen(networks) + 1; + LIBCFS_ALLOC(tokens, tokensize); - if (tokens == NULL) { + if (!tokens) { CERROR("Can't allocate net tokens\n"); return -ENOMEM; } - the_lnet.ln_network_tokens = tokens; - the_lnet.ln_network_tokens_nob = tokensize; memcpy(tokens, networks, tokensize); - str = tmp = tokens; - - /* Add in the loopback network */ - ni = lnet_ni_alloc(LNET_MKNET(LOLND, 0), NULL, nilist); - if (ni == NULL) - goto failed; + tmp = tokens; + str = tokens; - while (str != NULL && *str != 0) { + while (str && *str) { char *comma = strchr(str, ','); char *bracket = strchr(str, '('); char *square = strchr(str, '['); @@ -217,26 +225,29 @@ lnet_parse_networks(struct list_head *nilist, char *networks) int niface; int rc; - /* NB we don't check interface conflicts here; it's the LNDs - * responsibility (if it cares at all) */ - - if (square != NULL && (comma == NULL || square < comma)) { - /* i.e: o2ib0(ib0)[1,2], number between square - * brackets are CPTs this NI needs to be bond */ - if (bracket != NULL && bracket > square) { + /* + * NB we don't check interface conflicts here; it's the LNDs + * responsibility (if it cares at all) + */ + if (square && (!comma || square < comma)) { + /* + * i.e: o2ib0(ib0)[1,2], number between square + * brackets are CPTs this NI needs to be bond + */ + if (bracket && bracket > square) { tmp = square; goto failed_syntax; } tmp = strchr(square, ']'); - if (tmp == NULL) { + if (!tmp) { tmp = square; goto failed_syntax; } rc = cfs_expr_list_parse(square, tmp - square + 1, 0, LNET_CPT_NUMBER - 1, &el); - if (rc != 0) { + if (rc) { tmp = square; goto failed_syntax; } @@ -245,12 +256,10 @@ lnet_parse_networks(struct list_head *nilist, char *networks) *square++ = ' '; } - if (bracket == NULL || - (comma != NULL && comma < bracket)) { - + if (!bracket || (comma && comma < bracket)) { /* no interface list specified */ - if (comma != NULL) + if (comma) *comma++ = 0; net = libcfs_str2net(cfs_trimwhite(str)); @@ -262,10 +271,10 @@ lnet_parse_networks(struct list_head *nilist, char *networks) } if (LNET_NETTYP(net) != LOLND && /* LO is implicit */ - lnet_ni_alloc(net, el, nilist) == NULL) + !lnet_ni_alloc(net, el, nilist)) goto failed; - if (el != NULL) { + if (el) { cfs_expr_list_free(el); el = NULL; } @@ -281,12 +290,11 @@ lnet_parse_networks(struct list_head *nilist, char *networks) goto failed_syntax; } - nnets++; ni = lnet_ni_alloc(net, el, nilist); - if (ni == NULL) + if (!ni) goto failed; - if (el != NULL) { + if (el) { cfs_expr_list_free(el); el = NULL; } @@ -295,7 +303,7 @@ lnet_parse_networks(struct list_head *nilist, char *networks) iface = bracket + 1; bracket = strchr(iface, ')'); - if (bracket == NULL) { + if (!bracket) { tmp = iface; goto failed_syntax; } @@ -303,11 +311,11 @@ lnet_parse_networks(struct list_head *nilist, char *networks) *bracket = 0; do { comma = strchr(iface, ','); - if (comma != NULL) + if (comma) *comma++ = 0; iface = cfs_trimwhite(iface); - if (*iface == 0) { + if (!*iface) { tmp = iface; goto failed_syntax; } @@ -319,16 +327,32 @@ lnet_parse_networks(struct list_head *nilist, char *networks) goto failed; } - ni->ni_interfaces[niface++] = iface; + /* + * Allocate a separate piece of memory and copy + * into it the string, so we don't have + * a depencency on the tokens string. This way we + * can free the tokens at the end of the function. + * The newly allocated ni_interfaces[] can be + * freed when freeing the NI + */ + LIBCFS_ALLOC(ni->ni_interfaces[niface], + strlen(iface) + 1); + if (!ni->ni_interfaces[niface]) { + CERROR("Can't allocate net interface name\n"); + goto failed; + } + strncpy(ni->ni_interfaces[niface], iface, + strlen(iface)); + niface++; iface = comma; - } while (iface != NULL); + } while (iface); str = bracket + 1; comma = strchr(bracket + 1, ','); - if (comma != NULL) { + if (comma) { *comma = 0; str = cfs_trimwhite(str); - if (*str != 0) { + if (*str) { tmp = str; goto failed_syntax; } @@ -337,14 +361,17 @@ lnet_parse_networks(struct list_head *nilist, char *networks) } str = cfs_trimwhite(str); - if (*str != 0) { + if (*str) { tmp = str; goto failed_syntax; } } - LASSERT(!list_empty(nilist)); - return 0; + list_for_each(temp_node, nilist) + nnets++; + + LIBCFS_FREE(tokens, tokensize); + return nnets; failed_syntax: lnet_syntax("networks", networks, (int)(tmp - tokens), strlen(tmp)); @@ -356,23 +383,22 @@ lnet_parse_networks(struct list_head *nilist, char *networks) lnet_ni_free(ni); } - if (el != NULL) + if (el) cfs_expr_list_free(el); LIBCFS_FREE(tokens, tokensize); - the_lnet.ln_network_tokens = NULL; return -EINVAL; } -static struct lnet_text_buf_t * +static struct lnet_text_buf * lnet_new_text_buf(int str_len) { - struct lnet_text_buf_t *ltb; + struct lnet_text_buf *ltb; int nob; /* NB allocate space for the terminating 0 */ - nob = offsetof(struct lnet_text_buf_t, ltb_text[str_len + 1]); + nob = offsetof(struct lnet_text_buf, ltb_text[str_len + 1]); if (nob > LNET_SINGLE_TEXTBUF_NOB) { /* _way_ conservative for "route net gateway..." */ CERROR("text buffer too big\n"); @@ -385,7 +411,7 @@ lnet_new_text_buf(int str_len) } LIBCFS_ALLOC(ltb, nob); - if (ltb == NULL) + if (!ltb) return NULL; ltb->ltb_size = nob; @@ -395,7 +421,7 @@ lnet_new_text_buf(int str_len) } static void -lnet_free_text_buf(struct lnet_text_buf_t *ltb) +lnet_free_text_buf(struct lnet_text_buf *ltb) { lnet_tbnob -= ltb->ltb_size; LIBCFS_FREE(ltb, ltb->ltb_size); @@ -404,10 +430,10 @@ lnet_free_text_buf(struct lnet_text_buf_t *ltb) static void lnet_free_text_bufs(struct list_head *tbs) { - struct lnet_text_buf_t *ltb; + struct lnet_text_buf *ltb; while (!list_empty(tbs)) { - ltb = list_entry(tbs->next, struct lnet_text_buf_t, ltb_list); + ltb = list_entry(tbs->next, struct lnet_text_buf, ltb_list); list_del(<b->ltb_list); lnet_free_text_buf(ltb); @@ -421,7 +447,7 @@ lnet_str2tbs_sep(struct list_head *tbs, char *str) char *sep; int nob; int i; - struct lnet_text_buf_t *ltb; + struct lnet_text_buf *ltb; INIT_LIST_HEAD(&pending); @@ -432,16 +458,16 @@ lnet_str2tbs_sep(struct list_head *tbs, char *str) str++; /* scan for separator or comment */ - for (sep = str; *sep != 0; sep++) + for (sep = str; *sep; sep++) if (lnet_issep(*sep) || *sep == '#') break; nob = (int)(sep - str); if (nob > 0) { ltb = lnet_new_text_buf(nob); - if (ltb == NULL) { + if (!ltb) { lnet_free_text_bufs(&pending); - return -1; + return -ENOMEM; } for (i = 0; i < nob; i++) @@ -459,10 +485,10 @@ lnet_str2tbs_sep(struct list_head *tbs, char *str) /* scan for separator */ do { sep++; - } while (*sep != 0 && !lnet_issep(*sep)); + } while (*sep && !lnet_issep(*sep)); } - if (*sep == 0) + if (!*sep) break; str = sep + 1; @@ -479,18 +505,18 @@ lnet_expand1tb(struct list_head *list, { int len1 = (int)(sep1 - str); int len2 = strlen(sep2 + 1); - struct lnet_text_buf_t *ltb; + struct lnet_text_buf *ltb; LASSERT(*sep1 == '['); LASSERT(*sep2 == ']'); ltb = lnet_new_text_buf(len1 + itemlen + len2); - if (ltb == NULL) + if (!ltb) return -ENOMEM; memcpy(ltb->ltb_text, str, len1); memcpy(<b->ltb_text[len1], item, itemlen); - memcpy(<b->ltb_text[len1+itemlen], sep2 + 1, len2); + memcpy(<b->ltb_text[len1 + itemlen], sep2 + 1, len2); ltb->ltb_text[len1 + itemlen + len2] = 0; list_add_tail(<b->ltb_list, list); @@ -516,15 +542,14 @@ lnet_str2tbs_expand(struct list_head *tbs, char *str) INIT_LIST_HEAD(&pending); sep = strchr(str, '['); - if (sep == NULL) /* nothing to expand */ + if (!sep) /* nothing to expand */ return 0; sep2 = strchr(sep, ']'); - if (sep2 == NULL) + if (!sep2) goto failed; for (parsed = sep; parsed < sep2; parsed = enditem) { - enditem = ++parsed; while (enditem < sep2 && *enditem != ',') enditem++; @@ -534,17 +559,13 @@ lnet_str2tbs_expand(struct list_head *tbs, char *str) if (sscanf(parsed, "%d-%d/%d%n", &lo, &hi, &stride, &scanned) < 3) { - if (sscanf(parsed, "%d-%d%n", &lo, &hi, &scanned) < 2) { - /* simple string enumeration */ - if (lnet_expand1tb( - &pending, str, sep, sep2, - parsed, - (int)(enditem - parsed)) != 0) { + if (lnet_expand1tb(&pending, str, sep, sep2, + parsed, + (int)(enditem - parsed))) { goto failed; } - continue; } @@ -557,18 +578,17 @@ lnet_str2tbs_expand(struct list_head *tbs, char *str) goto failed; if (hi < 0 || lo < 0 || stride < 0 || hi < lo || - (hi - lo) % stride != 0) + (hi - lo) % stride) goto failed; for (i = lo; i <= hi; i += stride) { - snprintf(num, sizeof(num), "%d", i); nob = strlen(num); if (nob + 1 == sizeof(num)) goto failed; if (lnet_expand1tb(&pending, str, sep, sep2, - num, nob) != 0) + num, nob)) goto failed; } } @@ -578,7 +598,7 @@ lnet_str2tbs_expand(struct list_head *tbs, char *str) failed: lnet_free_text_bufs(&pending); - return -1; + return -EINVAL; } static int @@ -602,17 +622,19 @@ lnet_parse_priority(char *str, unsigned int *priority, char **token) int len; sep = strchr(str, LNET_PRIORITY_SEPARATOR); - if (sep == NULL) { + if (!sep) { *priority = 0; return 0; } len = strlen(sep + 1); - if ((sscanf((sep+1), "%u%n", priority, &nob) < 1) || (len != nob)) { - /* Update the caller's token pointer so it treats the found - priority as the token to report in the error message. */ + if ((sscanf((sep + 1), "%u%n", priority, &nob) < 1) || (len != nob)) { + /* + * Update the caller's token pointer so it treats the found + * priority as the token to report in the error message. + */ *token += sep - str + 1; - return -1; + return -EINVAL; } CDEBUG(D_NET, "gateway %s, priority %d, nob %d\n", str, *priority, nob); @@ -636,13 +658,13 @@ lnet_parse_route(char *str, int *im_a_router) struct list_head *tmp2; __u32 net; lnet_nid_t nid; - struct lnet_text_buf_t *ltb; + struct lnet_text_buf *ltb; int rc; char *sep; char *token = str; int ntokens = 0; int myrc = -1; - unsigned int hops; + __u32 hops; int got_hops = 0; unsigned int priority = 0; @@ -658,7 +680,7 @@ lnet_parse_route(char *str, int *im_a_router) /* scan for token start */ while (isspace(*sep)) sep++; - if (*sep == 0) { + if (!*sep) { if (ntokens < (got_hops ? 3 : 2)) goto token_error; break; @@ -668,9 +690,9 @@ lnet_parse_route(char *str, int *im_a_router) token = sep++; /* scan for token end */ - while (*sep != 0 && !isspace(*sep)) + while (*sep && !isspace(*sep)) sep++; - if (*sep != 0) + if (*sep) *sep++ = 0; if (ntokens == 1) { @@ -684,7 +706,7 @@ lnet_parse_route(char *str, int *im_a_router) } ltb = lnet_new_text_buf(strlen(token)); - if (ltb == NULL) + if (!ltb) goto out; strcpy(ltb->ltb_text, token); @@ -692,8 +714,7 @@ lnet_parse_route(char *str, int *im_a_router) list_add_tail(tmp1, tmp2); while (tmp1 != tmp2) { - ltb = list_entry(tmp1, struct lnet_text_buf_t, - ltb_list); + ltb = list_entry(tmp1, struct lnet_text_buf, ltb_list); rc = lnet_str2tbs_expand(tmp1->next, ltb->ltb_text); if (rc < 0) @@ -726,20 +747,23 @@ lnet_parse_route(char *str, int *im_a_router) } } + /** + * if there are no hops set then we want to flag this value as + * unset since hops is an optional parameter + */ if (!got_hops) - hops = 1; + hops = LNET_UNDEFINED_HOPS; LASSERT(!list_empty(&nets)); LASSERT(!list_empty(&gateways)); list_for_each(tmp1, &nets) { - ltb = list_entry(tmp1, struct lnet_text_buf_t, ltb_list); + ltb = list_entry(tmp1, struct lnet_text_buf, ltb_list); net = libcfs_str2net(ltb->ltb_text); LASSERT(net != LNET_NIDNET(LNET_NID_ANY)); list_for_each(tmp2, &gateways) { - ltb = list_entry(tmp2, struct lnet_text_buf_t, - ltb_list); + ltb = list_entry(tmp2, struct lnet_text_buf, ltb_list); nid = libcfs_str2nid(ltb->ltb_text); LASSERT(nid != LNET_NID_ANY); @@ -749,7 +773,7 @@ lnet_parse_route(char *str, int *im_a_router) } rc = lnet_add_route(net, hops, nid, priority); - if (rc != 0) { + if (rc && rc != -EEXIST && rc != -EHOSTUNREACH) { CERROR("Can't create route to %s via %s\n", libcfs_net2str(net), libcfs_nid2str(nid)); @@ -772,10 +796,10 @@ lnet_parse_route(char *str, int *im_a_router) static int lnet_parse_route_tbs(struct list_head *tbs, int *im_a_router) { - struct lnet_text_buf_t *ltb; + struct lnet_text_buf *ltb; while (!list_empty(tbs)) { - ltb = list_entry(tbs->next, struct lnet_text_buf_t, ltb_list); + ltb = list_entry(tbs->next, struct lnet_text_buf, ltb_list); if (lnet_parse_route(ltb->ltb_text, im_a_router) < 0) { lnet_free_text_bufs(tbs); @@ -806,7 +830,7 @@ lnet_parse_routes(char *routes, int *im_a_router) rc = lnet_parse_route_tbs(&tbs, im_a_router); } - LASSERT(lnet_tbnob == 0); + LASSERT(!lnet_tbnob); return rc; } @@ -818,7 +842,7 @@ lnet_match_network_token(char *token, int len, __u32 *ipaddrs, int nip) int i; rc = cfs_ip_addr_parse(token, len, &list); - if (rc != 0) + if (rc) return rc; for (rc = i = 0; !rc && i < nip; i++) @@ -851,18 +875,18 @@ lnet_match_network_tokens(char *net_entry, __u32 *ipaddrs, int nip) /* scan for token start */ while (isspace(*sep)) sep++; - if (*sep == 0) + if (!*sep) break; token = sep++; /* scan for token end */ - while (*sep != 0 && !isspace(*sep)) + while (*sep && !isspace(*sep)) sep++; - if (*sep != 0) + if (*sep) *sep++ = 0; - if (ntokens++ == 0) { + if (!ntokens++) { net = token; continue; } @@ -876,7 +900,8 @@ lnet_match_network_tokens(char *net_entry, __u32 *ipaddrs, int nip) return rc; } - matched |= (rc != 0); + if (rc) + matched |= 1; } if (!matched) @@ -892,12 +917,12 @@ lnet_netspec2net(char *netspec) char *bracket = strchr(netspec, '('); __u32 net; - if (bracket != NULL) + if (bracket) *bracket = 0; net = libcfs_str2net(netspec); - if (bracket != NULL) + if (bracket) *bracket = '('; return net; @@ -909,8 +934,8 @@ lnet_splitnets(char *source, struct list_head *nets) int offset = 0; int offset2; int len; - struct lnet_text_buf_t *tb; - struct lnet_text_buf_t *tb2; + struct lnet_text_buf *tb; + struct lnet_text_buf *tb2; struct list_head *t; char *sep; char *bracket; @@ -919,15 +944,13 @@ lnet_splitnets(char *source, struct list_head *nets) LASSERT(!list_empty(nets)); LASSERT(nets->next == nets->prev); /* single entry */ - tb = list_entry(nets->next, struct lnet_text_buf_t, ltb_list); + tb = list_entry(nets->next, struct lnet_text_buf, ltb_list); for (;;) { sep = strchr(tb->ltb_text, ','); bracket = strchr(tb->ltb_text, '('); - if (sep != NULL && - bracket != NULL && - bracket < sep) { + if (sep && bracket && bracket < sep) { /* netspec lists interfaces... */ offset2 = offset + (int)(bracket - tb->ltb_text); @@ -935,16 +958,16 @@ lnet_splitnets(char *source, struct list_head *nets) bracket = strchr(bracket + 1, ')'); - if (bracket == NULL || - !(bracket[1] == ',' || bracket[1] == 0)) { + if (!bracket || + !(bracket[1] == ',' || !bracket[1])) { lnet_syntax("ip2nets", source, offset2, len); return -EINVAL; } - sep = (bracket[1] == 0) ? NULL : bracket + 1; + sep = !bracket[1] ? NULL : bracket + 1; } - if (sep != NULL) + if (sep) *sep++ = 0; net = lnet_netspec2net(tb->ltb_text); @@ -955,7 +978,7 @@ lnet_splitnets(char *source, struct list_head *nets) } list_for_each(t, nets) { - tb2 = list_entry(t, struct lnet_text_buf_t, ltb_list); + tb2 = list_entry(t, struct lnet_text_buf, ltb_list); if (tb2 == tb) continue; @@ -968,13 +991,13 @@ lnet_splitnets(char *source, struct list_head *nets) } } - if (sep == NULL) + if (!sep) return 0; offset += (int)(sep - tb->ltb_text); len = strlen(sep); tb2 = lnet_new_text_buf(len); - if (tb2 == NULL) + if (!tb2) return -ENOMEM; strncpy(tb2->ltb_text, sep, len); @@ -996,8 +1019,9 @@ lnet_match_networks(char **networksp, char *ip2nets, __u32 *ipaddrs, int nip) struct list_head current_nets; struct list_head *t; struct list_head *t2; - struct lnet_text_buf_t *tb; - struct lnet_text_buf_t *tb2; + struct lnet_text_buf *tb; + struct lnet_text_buf *temp; + struct lnet_text_buf *tb2; __u32 net1; __u32 net2; int len; @@ -1008,7 +1032,7 @@ lnet_match_networks(char **networksp, char *ip2nets, __u32 *ipaddrs, int nip) INIT_LIST_HEAD(&raw_entries); if (lnet_str2tbs_sep(&raw_entries, ip2nets) < 0) { CERROR("Error parsing ip2nets\n"); - LASSERT(lnet_tbnob == 0); + LASSERT(!lnet_tbnob); return -EINVAL; } @@ -1019,12 +1043,9 @@ lnet_match_networks(char **networksp, char *ip2nets, __u32 *ipaddrs, int nip) len = 0; rc = 0; - while (!list_empty(&raw_entries)) { - tb = list_entry(raw_entries.next, struct lnet_text_buf_t, - ltb_list); - + list_for_each_entry_safe(tb, temp, &raw_entries, ltb_list) { strncpy(source, tb->ltb_text, sizeof(source)); - source[sizeof(source)-1] = '\0'; + source[sizeof(source) - 1] = '\0'; /* replace ltb_text with the network(s) add on match */ rc = lnet_match_network_tokens(tb->ltb_text, ipaddrs, nip); @@ -1033,7 +1054,7 @@ lnet_match_networks(char **networksp, char *ip2nets, __u32 *ipaddrs, int nip) list_del(&tb->ltb_list); - if (rc == 0) { /* no match */ + if (!rc) { /* no match */ lnet_free_text_buf(tb); continue; } @@ -1047,13 +1068,13 @@ lnet_match_networks(char **networksp, char *ip2nets, __u32 *ipaddrs, int nip) dup = 0; list_for_each(t, ¤t_nets) { - tb = list_entry(t, struct lnet_text_buf_t, ltb_list); + tb = list_entry(t, struct lnet_text_buf, ltb_list); net1 = lnet_netspec2net(tb->ltb_text); LASSERT(net1 != LNET_NIDNET(LNET_NID_ANY)); list_for_each(t2, &matched_nets) { - tb2 = list_entry(t2, struct lnet_text_buf_t, - ltb_list); + tb2 = list_entry(t2, struct lnet_text_buf, + ltb_list); net2 = lnet_netspec2net(tb2->ltb_text); LASSERT(net2 != LNET_NIDNET(LNET_NID_ANY)); @@ -1073,13 +1094,13 @@ lnet_match_networks(char **networksp, char *ip2nets, __u32 *ipaddrs, int nip) } list_for_each_safe(t, t2, ¤t_nets) { - tb = list_entry(t, struct lnet_text_buf_t, ltb_list); + tb = list_entry(t, struct lnet_text_buf, ltb_list); list_del(&tb->ltb_list); list_add_tail(&tb->ltb_list, &matched_nets); len += snprintf(networks + len, sizeof(networks) - len, - "%s%s", (len == 0) ? "" : ",", + "%s%s", !len ? "" : ",", tb->ltb_text); if (len >= sizeof(networks)) { @@ -1096,7 +1117,7 @@ lnet_match_networks(char **networksp, char *ip2nets, __u32 *ipaddrs, int nip) lnet_free_text_bufs(&raw_entries); lnet_free_text_bufs(&matched_nets); lnet_free_text_bufs(¤t_nets); - LASSERT(lnet_tbnob == 0); + LASSERT(!lnet_tbnob); if (rc < 0) return rc; @@ -1122,7 +1143,7 @@ lnet_ipaddr_enumerate(__u32 **ipaddrsp) return nif; LIBCFS_ALLOC(ipaddrs, nif * sizeof(*ipaddrs)); - if (ipaddrs == NULL) { + if (!ipaddrs) { CERROR("Can't allocate ipaddrs[%d]\n", nif); lnet_ipif_free_enumeration(ifnames, nif); return -ENOMEM; @@ -1133,7 +1154,7 @@ lnet_ipaddr_enumerate(__u32 **ipaddrsp) continue; rc = lnet_ipif_query(ifnames[i], &up, &ipaddrs[nip], &netmask); - if (rc != 0) { + if (rc) { CWARN("Can't query interface %s: %d\n", ifnames[i], rc); continue; @@ -1155,7 +1176,7 @@ lnet_ipaddr_enumerate(__u32 **ipaddrsp) } else { if (nip > 0) { LIBCFS_ALLOC(ipaddrs2, nip * sizeof(*ipaddrs2)); - if (ipaddrs2 == NULL) { + if (!ipaddrs2) { CERROR("Can't allocate ipaddrs[%d]\n", nip); nip = -ENOMEM; } else { @@ -1184,7 +1205,7 @@ lnet_parse_ip2nets(char **networksp, char *ip2nets) return nip; } - if (nip == 0) { + if (!nip) { LCONSOLE_ERROR_MSG(0x118, "No local IP interfaces for ip2nets to match\n"); return -ENOENT; @@ -1198,7 +1219,7 @@ lnet_parse_ip2nets(char **networksp, char *ip2nets) return rc; } - if (rc == 0) { + if (!rc) { LCONSOLE_ERROR_MSG(0x11a, "ip2nets does not match any local IP interfaces\n"); return -ENOENT; diff --git a/drivers/staging/lustre/lnet/lnet/lib-eq.c b/drivers/staging/lustre/lnet/lnet/lib-eq.c index 64f94a690..adbcadbab 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-eq.c +++ b/drivers/staging/lustre/lnet/lnet/lib-eq.c @@ -72,33 +72,38 @@ LNetEQAlloc(unsigned int count, lnet_eq_handler_t callback, { lnet_eq_t *eq; - LASSERT(the_lnet.ln_init); LASSERT(the_lnet.ln_refcount > 0); - /* We need count to be a power of 2 so that when eq_{enq,deq}_seq + /* + * We need count to be a power of 2 so that when eq_{enq,deq}_seq * overflow, they don't skip entries, so the queue has the same - * apparent capacity at all times */ + * apparent capacity at all times + */ + if (count) + count = roundup_pow_of_two(count); - count = roundup_pow_of_two(count); - - if (callback != LNET_EQ_HANDLER_NONE && count != 0) + if (callback != LNET_EQ_HANDLER_NONE && count) CWARN("EQ callback is guaranteed to get every event, do you still want to set eqcount %d for polling event which will have locking overhead? Please contact with developer to confirm\n", count); - /* count can be 0 if only need callback, we can eliminate - * overhead of enqueue event */ - if (count == 0 && callback == LNET_EQ_HANDLER_NONE) + /* + * count can be 0 if only need callback, we can eliminate + * overhead of enqueue event + */ + if (!count && callback == LNET_EQ_HANDLER_NONE) return -EINVAL; eq = lnet_eq_alloc(); - if (eq == NULL) + if (!eq) return -ENOMEM; - if (count != 0) { + if (count) { LIBCFS_ALLOC(eq->eq_events, count * sizeof(lnet_event_t)); - if (eq->eq_events == NULL) + if (!eq->eq_events) goto failed; - /* NB allocator has set all event sequence numbers to 0, - * so all them should be earlier than eq_deq_seq */ + /* + * NB allocator has set all event sequence numbers to 0, + * so all them should be earlier than eq_deq_seq + */ } eq->eq_deq_seq = 1; @@ -108,13 +113,15 @@ LNetEQAlloc(unsigned int count, lnet_eq_handler_t callback, eq->eq_refs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*eq->eq_refs[0])); - if (eq->eq_refs == NULL) + if (!eq->eq_refs) goto failed; /* MUST hold both exclusive lnet_res_lock */ lnet_res_lock(LNET_LOCK_EX); - /* NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do - * both EQ lookup and poll event with only lnet_eq_wait_lock */ + /* + * NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do + * both EQ lookup and poll event with only lnet_eq_wait_lock + */ lnet_eq_wait_lock(); lnet_res_lh_initialize(&the_lnet.ln_eq_container, &eq->eq_lh); @@ -127,10 +134,10 @@ LNetEQAlloc(unsigned int count, lnet_eq_handler_t callback, return 0; failed: - if (eq->eq_events != NULL) + if (eq->eq_events) LIBCFS_FREE(eq->eq_events, count * sizeof(lnet_event_t)); - if (eq->eq_refs != NULL) + if (eq->eq_refs) cfs_percpt_free(eq->eq_refs); lnet_eq_free(eq); @@ -159,23 +166,24 @@ LNetEQFree(lnet_handle_eq_t eqh) int size = 0; int i; - LASSERT(the_lnet.ln_init); LASSERT(the_lnet.ln_refcount > 0); lnet_res_lock(LNET_LOCK_EX); - /* NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do - * both EQ lookup and poll event with only lnet_eq_wait_lock */ + /* + * NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do + * both EQ lookup and poll event with only lnet_eq_wait_lock + */ lnet_eq_wait_lock(); eq = lnet_handle2eq(&eqh); - if (eq == NULL) { + if (!eq) { rc = -ENOENT; goto out; } cfs_percpt_for_each(ref, i, eq->eq_refs) { LASSERT(*ref >= 0); - if (*ref == 0) + if (!*ref) continue; CDEBUG(D_NET, "Event equeue (%d: %d) busy on destroy.\n", @@ -196,9 +204,9 @@ LNetEQFree(lnet_handle_eq_t eqh) lnet_eq_wait_unlock(); lnet_res_unlock(LNET_LOCK_EX); - if (events != NULL) + if (events) LIBCFS_FREE(events, size * sizeof(lnet_event_t)); - if (refs != NULL) + if (refs) cfs_percpt_free(refs); return rc; @@ -211,7 +219,7 @@ lnet_eq_enqueue_event(lnet_eq_t *eq, lnet_event_t *ev) /* MUST called with resource lock hold but w/o lnet_eq_wait_lock */ int index; - if (eq->eq_size == 0) { + if (!eq->eq_size) { LASSERT(eq->eq_callback != LNET_EQ_HANDLER_NONE); eq->eq_callback(ev); return; @@ -255,8 +263,10 @@ lnet_eq_dequeue_event(lnet_eq_t *eq, lnet_event_t *ev) if (eq->eq_deq_seq == new_event->sequence) { rc = 1; } else { - /* don't complain with CERROR: some EQs are sized small - * anyway; if it's important, the caller should complain */ + /* + * don't complain with CERROR: some EQs are sized small + * anyway; if it's important, the caller should complain + */ CDEBUG(D_NET, "Event Queue Overflow: eq seq %lu ev seq %lu\n", eq->eq_deq_seq, new_event->sequence); rc = -EOVERFLOW; @@ -309,8 +319,8 @@ __must_hold(&the_lnet.ln_eq_wait_lock) wait_queue_t wl; unsigned long now; - if (tms == 0) - return -1; /* don't want to wait and no new event */ + if (!tms) + return -ENXIO; /* don't want to wait and no new event */ init_waitqueue_entry(&wl, current); set_current_state(TASK_INTERRUPTIBLE); @@ -320,7 +330,6 @@ __must_hold(&the_lnet.ln_eq_wait_lock) if (tms < 0) { schedule(); - } else { now = jiffies; schedule_timeout(msecs_to_jiffies(tms)); @@ -329,7 +338,7 @@ __must_hold(&the_lnet.ln_eq_wait_lock) tms = 0; } - wait = tms != 0; /* might need to call here again */ + wait = tms; /* might need to call here again */ *timeout_ms = tms; lnet_eq_wait_lock(); @@ -372,7 +381,6 @@ LNetEQPoll(lnet_handle_eq_t *eventqs, int neq, int timeout_ms, int rc; int i; - LASSERT(the_lnet.ln_init); LASSERT(the_lnet.ln_refcount > 0); if (neq < 1) @@ -384,20 +392,20 @@ LNetEQPoll(lnet_handle_eq_t *eventqs, int neq, int timeout_ms, for (i = 0; i < neq; i++) { lnet_eq_t *eq = lnet_handle2eq(&eventqs[i]); - if (eq == NULL) { + if (!eq) { lnet_eq_wait_unlock(); return -ENOENT; } rc = lnet_eq_dequeue_event(eq, event); - if (rc != 0) { + if (rc) { lnet_eq_wait_unlock(); *which = i; return rc; } } - if (wait == 0) + if (!wait) break; /* diff --git a/drivers/staging/lustre/lnet/lnet/lib-md.c b/drivers/staging/lustre/lnet/lnet/lib-md.c index 758f5bede..75d31217b 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-md.c +++ b/drivers/staging/lustre/lnet/lnet/lib-md.c @@ -46,16 +46,18 @@ void lnet_md_unlink(lnet_libmd_t *md) { - if ((md->md_flags & LNET_MD_FLAG_ZOMBIE) == 0) { + if (!(md->md_flags & LNET_MD_FLAG_ZOMBIE)) { /* first unlink attempt... */ lnet_me_t *me = md->md_me; md->md_flags |= LNET_MD_FLAG_ZOMBIE; - /* Disassociate from ME (if any), + /* + * Disassociate from ME (if any), * and unlink it if it was created - * with LNET_UNLINK */ - if (me != NULL) { + * with LNET_UNLINK + */ + if (me) { /* detach MD from portal */ lnet_ptl_detach_md(me, md); if (me->me_unlink == LNET_UNLINK) @@ -66,14 +68,14 @@ lnet_md_unlink(lnet_libmd_t *md) lnet_res_lh_invalidate(&md->md_lh); } - if (md->md_refcount != 0) { + if (md->md_refcount) { CDEBUG(D_NET, "Queueing unlink of md %p\n", md); return; } CDEBUG(D_NET, "Unlinking md %p\n", md); - if (md->md_eq != NULL) { + if (md->md_eq) { int cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie); LASSERT(*md->md_eq->eq_refs[cpt] > 0); @@ -103,12 +105,12 @@ lnet_md_build(lnet_libmd_t *lmd, lnet_md_t *umd, int unlink) lmd->md_refcount = 0; lmd->md_flags = (unlink == LNET_UNLINK) ? LNET_MD_FLAG_AUTO_UNLINK : 0; - if ((umd->options & LNET_MD_IOVEC) != 0) { - - if ((umd->options & LNET_MD_KIOV) != 0) /* Can't specify both */ + if (umd->options & LNET_MD_IOVEC) { + if (umd->options & LNET_MD_KIOV) /* Can't specify both */ return -EINVAL; - lmd->md_niov = niov = umd->length; + niov = umd->length; + lmd->md_niov = umd->length; memcpy(lmd->md_iov.iov, umd->start, niov * sizeof(lmd->md_iov.iov[0])); @@ -123,20 +125,21 @@ lnet_md_build(lnet_libmd_t *lmd, lnet_md_t *umd, int unlink) lmd->md_length = total_length; - if ((umd->options & LNET_MD_MAX_SIZE) != 0 && /* use max size */ + if ((umd->options & LNET_MD_MAX_SIZE) && /* use max size */ (umd->max_size < 0 || umd->max_size > total_length)) /* illegal max_size */ return -EINVAL; - } else if ((umd->options & LNET_MD_KIOV) != 0) { - lmd->md_niov = niov = umd->length; + } else if (umd->options & LNET_MD_KIOV) { + niov = umd->length; + lmd->md_niov = umd->length; memcpy(lmd->md_iov.kiov, umd->start, niov * sizeof(lmd->md_iov.kiov[0])); for (i = 0; i < (int)niov; i++) { /* We take the page pointer on trust */ if (lmd->md_iov.kiov[i].kiov_offset + - lmd->md_iov.kiov[i].kiov_len > PAGE_CACHE_SIZE) + lmd->md_iov.kiov[i].kiov_len > PAGE_SIZE) return -EINVAL; /* invalid length */ total_length += lmd->md_iov.kiov[i].kiov_len; @@ -144,17 +147,18 @@ lnet_md_build(lnet_libmd_t *lmd, lnet_md_t *umd, int unlink) lmd->md_length = total_length; - if ((umd->options & LNET_MD_MAX_SIZE) != 0 && /* max size used */ + if ((umd->options & LNET_MD_MAX_SIZE) && /* max size used */ (umd->max_size < 0 || umd->max_size > total_length)) /* illegal max_size */ return -EINVAL; } else { /* contiguous */ lmd->md_length = umd->length; - lmd->md_niov = niov = 1; + niov = 1; + lmd->md_niov = 1; lmd->md_iov.iov[0].iov_base = umd->start; lmd->md_iov.iov[0].iov_len = umd->length; - if ((umd->options & LNET_MD_MAX_SIZE) != 0 && /* max size used */ + if ((umd->options & LNET_MD_MAX_SIZE) && /* max size used */ (umd->max_size < 0 || umd->max_size > (int)umd->length)) /* illegal max_size */ return -EINVAL; @@ -169,22 +173,26 @@ lnet_md_link(lnet_libmd_t *md, lnet_handle_eq_t eq_handle, int cpt) { struct lnet_res_container *container = the_lnet.ln_md_containers[cpt]; - /* NB we are passed an allocated, but inactive md. + /* + * NB we are passed an allocated, but inactive md. * if we return success, caller may lnet_md_unlink() it. * otherwise caller may only lnet_md_free() it. */ - /* This implementation doesn't know how to create START events or + /* + * This implementation doesn't know how to create START events or * disable END events. Best to LASSERT our caller is compliant so - * we find out quickly... */ - /* TODO - reevaluate what should be here in light of + * we find out quickly... + */ + /* + * TODO - reevaluate what should be here in light of * the removal of the start and end events * maybe there we shouldn't even allow LNET_EQ_NONE!) - * LASSERT (eq == NULL); + * LASSERT(!eq); */ if (!LNetHandleIsInvalid(eq_handle)) { md->md_eq = lnet_handle2eq(&eq_handle); - if (md->md_eq == NULL) + if (!md->md_eq) return -ENOENT; (*md->md_eq->eq_refs[cpt])++; @@ -208,8 +216,8 @@ lnet_md_deconstruct(lnet_libmd_t *lmd, lnet_md_t *umd) * and that's all. */ umd->start = lmd->md_start; - umd->length = ((lmd->md_options & - (LNET_MD_IOVEC | LNET_MD_KIOV)) == 0) ? + umd->length = !(lmd->md_options & + (LNET_MD_IOVEC | LNET_MD_KIOV)) ? lmd->md_length : lmd->md_niov; umd->threshold = lmd->md_threshold; umd->max_size = lmd->md_max_size; @@ -221,13 +229,13 @@ lnet_md_deconstruct(lnet_libmd_t *lmd, lnet_md_t *umd) static int lnet_md_validate(lnet_md_t *umd) { - if (umd->start == NULL && umd->length != 0) { + if (!umd->start && umd->length) { CERROR("MD start pointer can not be NULL with length %u\n", umd->length); return -EINVAL; } - if ((umd->options & (LNET_MD_KIOV | LNET_MD_IOVEC)) != 0 && + if ((umd->options & (LNET_MD_KIOV | LNET_MD_IOVEC)) && umd->length > LNET_MAX_IOV) { CERROR("Invalid option: too many fragments %u, %d max\n", umd->length, LNET_MAX_IOV); @@ -273,41 +281,42 @@ LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd, int cpt; int rc; - LASSERT(the_lnet.ln_init); LASSERT(the_lnet.ln_refcount > 0); - if (lnet_md_validate(&umd) != 0) + if (lnet_md_validate(&umd)) return -EINVAL; - if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT)) == 0) { + if (!(umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT))) { CERROR("Invalid option: no MD_OP set\n"); return -EINVAL; } md = lnet_md_alloc(&umd); - if (md == NULL) + if (!md) return -ENOMEM; rc = lnet_md_build(md, &umd, unlink); cpt = lnet_cpt_of_cookie(meh.cookie); lnet_res_lock(cpt); - if (rc != 0) + if (rc) goto failed; me = lnet_handle2me(&meh); - if (me == NULL) + if (!me) rc = -ENOENT; - else if (me->me_md != NULL) + else if (me->me_md) rc = -EBUSY; else rc = lnet_md_link(md, umd.eq_handle, cpt); - if (rc != 0) + if (rc) goto failed; - /* attach this MD to portal of ME and check if it matches any - * blocked msgs on this portal */ + /* + * attach this MD to portal of ME and check if it matches any + * blocked msgs on this portal + */ lnet_ptl_attach_md(me, md, &matches, &drops); lnet_md2handle(handle, md); @@ -350,29 +359,28 @@ LNetMDBind(lnet_md_t umd, lnet_unlink_t unlink, lnet_handle_md_t *handle) int cpt; int rc; - LASSERT(the_lnet.ln_init); LASSERT(the_lnet.ln_refcount > 0); - if (lnet_md_validate(&umd) != 0) + if (lnet_md_validate(&umd)) return -EINVAL; - if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT)) != 0) { + if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT))) { CERROR("Invalid option: GET|PUT illegal on active MDs\n"); return -EINVAL; } md = lnet_md_alloc(&umd); - if (md == NULL) + if (!md) return -ENOMEM; rc = lnet_md_build(md, &umd, unlink); cpt = lnet_res_lock_current(); - if (rc != 0) + if (rc) goto failed; rc = lnet_md_link(md, umd.eq_handle, cpt); - if (rc != 0) + if (rc) goto failed; lnet_md2handle(handle, md); @@ -425,23 +433,24 @@ LNetMDUnlink(lnet_handle_md_t mdh) lnet_libmd_t *md; int cpt; - LASSERT(the_lnet.ln_init); LASSERT(the_lnet.ln_refcount > 0); cpt = lnet_cpt_of_cookie(mdh.cookie); lnet_res_lock(cpt); md = lnet_handle2md(&mdh); - if (md == NULL) { + if (!md) { lnet_res_unlock(cpt); return -ENOENT; } md->md_flags |= LNET_MD_FLAG_ABORTED; - /* If the MD is busy, lnet_md_unlink just marks it for deletion, and + /* + * If the MD is busy, lnet_md_unlink just marks it for deletion, and * when the LND is done, the completion event flags that the MD was - * unlinked. Otherwise, we enqueue an event now... */ - if (md->md_eq != NULL && md->md_refcount == 0) { + * unlinked. Otherwise, we enqueue an event now... + */ + if (md->md_eq && !md->md_refcount) { lnet_build_unlink_event(md, &ev); lnet_eq_enqueue_event(md->md_eq, &ev); } diff --git a/drivers/staging/lustre/lnet/lnet/lib-me.c b/drivers/staging/lustre/lnet/lnet/lib-me.c index 42fc99ef9..e671aed37 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-me.c +++ b/drivers/staging/lustre/lnet/lnet/lib-me.c @@ -83,7 +83,6 @@ LNetMEAttach(unsigned int portal, struct lnet_me *me; struct list_head *head; - LASSERT(the_lnet.ln_init); LASSERT(the_lnet.ln_refcount > 0); if ((int)portal >= the_lnet.ln_nportals) @@ -91,11 +90,11 @@ LNetMEAttach(unsigned int portal, mtable = lnet_mt_of_attach(portal, match_id, match_bits, ignore_bits, pos); - if (mtable == NULL) /* can't match portal type */ + if (!mtable) /* can't match portal type */ return -EPERM; me = lnet_me_alloc(); - if (me == NULL) + if (!me) return -ENOMEM; lnet_res_lock(mtable->mt_cpt); @@ -109,7 +108,7 @@ LNetMEAttach(unsigned int portal, lnet_res_lh_initialize(the_lnet.ln_me_containers[mtable->mt_cpt], &me->me_lh); - if (ignore_bits != 0) + if (ignore_bits) head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE]; else head = lnet_mt_match_head(mtable, match_id, match_bits); @@ -156,14 +155,13 @@ LNetMEInsert(lnet_handle_me_t current_meh, struct lnet_portal *ptl; int cpt; - LASSERT(the_lnet.ln_init); LASSERT(the_lnet.ln_refcount > 0); if (pos == LNET_INS_LOCAL) return -EPERM; new_me = lnet_me_alloc(); - if (new_me == NULL) + if (!new_me) return -ENOMEM; cpt = lnet_cpt_of_cookie(current_meh.cookie); @@ -171,7 +169,7 @@ LNetMEInsert(lnet_handle_me_t current_meh, lnet_res_lock(cpt); current_me = lnet_handle2me(¤t_meh); - if (current_me == NULL) { + if (!current_me) { lnet_me_free(new_me); lnet_res_unlock(cpt); @@ -233,22 +231,21 @@ LNetMEUnlink(lnet_handle_me_t meh) lnet_event_t ev; int cpt; - LASSERT(the_lnet.ln_init); LASSERT(the_lnet.ln_refcount > 0); cpt = lnet_cpt_of_cookie(meh.cookie); lnet_res_lock(cpt); me = lnet_handle2me(&meh); - if (me == NULL) { + if (!me) { lnet_res_unlock(cpt); return -ENOENT; } md = me->me_md; - if (md != NULL) { + if (md) { md->md_flags |= LNET_MD_FLAG_ABORTED; - if (md->md_eq != NULL && md->md_refcount == 0) { + if (md->md_eq && !md->md_refcount) { lnet_build_unlink_event(md, &ev); lnet_eq_enqueue_event(md->md_eq, &ev); } @@ -267,7 +264,7 @@ lnet_me_unlink(lnet_me_t *me) { list_del(&me->me_list); - if (me->me_md != NULL) { + if (me->me_md) { lnet_libmd_t *md = me->me_md; /* detach MD from portal of this ME */ diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c index fb8f7be04..f19aa9320 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-move.c +++ b/drivers/staging/lustre/lnet/lnet/lib-move.c @@ -50,17 +50,16 @@ int lnet_fail_nid(lnet_nid_t nid, unsigned int threshold) { lnet_test_peer_t *tp; + lnet_test_peer_t *temp; struct list_head *el; struct list_head *next; struct list_head cull; - LASSERT(the_lnet.ln_init); - /* NB: use lnet_net_lock(0) to serialize operations on test peers */ - if (threshold != 0) { + if (threshold) { /* Adding a new entry */ LIBCFS_ALLOC(tp, sizeof(*tp)); - if (tp == NULL) + if (!tp) return -ENOMEM; tp->tp_nid = nid; @@ -80,7 +79,7 @@ lnet_fail_nid(lnet_nid_t nid, unsigned int threshold) list_for_each_safe(el, next, &the_lnet.ln_test_peers) { tp = list_entry(el, lnet_test_peer_t, tp_list); - if (tp->tp_threshold == 0 || /* needs culling anyway */ + if (!tp->tp_threshold || /* needs culling anyway */ nid == LNET_NID_ANY || /* removing all entries */ tp->tp_nid == nid) { /* matched this one */ list_del(&tp->tp_list); @@ -90,9 +89,7 @@ lnet_fail_nid(lnet_nid_t nid, unsigned int threshold) lnet_net_unlock(0); - while (!list_empty(&cull)) { - tp = list_entry(cull.next, lnet_test_peer_t, tp_list); - + list_for_each_entry_safe(tp, temp, &cull, tp_list) { list_del(&tp->tp_list); LIBCFS_FREE(tp, sizeof(*tp)); } @@ -103,6 +100,7 @@ static int fail_peer(lnet_nid_t nid, int outgoing) { lnet_test_peer_t *tp; + lnet_test_peer_t *temp; struct list_head *el; struct list_head *next; struct list_head cull; @@ -116,12 +114,14 @@ fail_peer(lnet_nid_t nid, int outgoing) list_for_each_safe(el, next, &the_lnet.ln_test_peers) { tp = list_entry(el, lnet_test_peer_t, tp_list); - if (tp->tp_threshold == 0) { + if (!tp->tp_threshold) { /* zombie entry */ if (outgoing) { - /* only cull zombies on outgoing tests, + /* + * only cull zombies on outgoing tests, * since we may be at interrupt priority on - * incoming messages. */ + * incoming messages. + */ list_del(&tp->tp_list); list_add(&tp->tp_list, &cull); } @@ -135,7 +135,7 @@ fail_peer(lnet_nid_t nid, int outgoing) if (tp->tp_threshold != LNET_MD_THRESH_INF) { tp->tp_threshold--; if (outgoing && - tp->tp_threshold == 0) { + !tp->tp_threshold) { /* see above */ list_del(&tp->tp_list); list_add(&tp->tp_list, &cull); @@ -147,8 +147,7 @@ fail_peer(lnet_nid_t nid, int outgoing) lnet_net_unlock(0); - while (!list_empty(&cull)) { - tp = list_entry(cull.next, lnet_test_peer_t, tp_list); + list_for_each_entry_safe(tp, temp, &cull, tp_list) { list_del(&tp->tp_list); LIBCFS_FREE(tp, sizeof(*tp)); @@ -162,6 +161,7 @@ lnet_iov_nob(unsigned int niov, struct kvec *iov) { unsigned int nob = 0; + LASSERT(!niov || iov); while (niov-- > 0) nob += (iov++)->iov_len; @@ -171,13 +171,13 @@ EXPORT_SYMBOL(lnet_iov_nob); void lnet_copy_iov2iov(unsigned int ndiov, struct kvec *diov, unsigned int doffset, - unsigned int nsiov, struct kvec *siov, unsigned int soffset, - unsigned int nob) + unsigned int nsiov, struct kvec *siov, unsigned int soffset, + unsigned int nob) { /* NB diov, siov are READ-ONLY */ unsigned int this_nob; - if (nob == 0) + if (!nob) return; /* skip complete frags before 'doffset' */ @@ -206,7 +206,7 @@ lnet_copy_iov2iov(unsigned int ndiov, struct kvec *diov, unsigned int doffset, this_nob = min(this_nob, nob); memcpy((char *)diov->iov_base + doffset, - (char *)siov->iov_base + soffset, this_nob); + (char *)siov->iov_base + soffset, this_nob); nob -= this_nob; if (diov->iov_len > doffset + this_nob) { @@ -230,16 +230,18 @@ EXPORT_SYMBOL(lnet_copy_iov2iov); int lnet_extract_iov(int dst_niov, struct kvec *dst, - int src_niov, struct kvec *src, - unsigned int offset, unsigned int len) + int src_niov, struct kvec *src, + unsigned int offset, unsigned int len) { - /* Initialise 'dst' to the subset of 'src' starting at 'offset', + /* + * Initialise 'dst' to the subset of 'src' starting at 'offset', * for exactly 'len' bytes, and return the number of entries. - * NB not destructive to 'src' */ + * NB not destructive to 'src' + */ unsigned int frag_len; unsigned int niov; - if (len == 0) /* no data => */ + if (!len) /* no data => */ return 0; /* no frags */ LASSERT(src_niov > 0); @@ -280,6 +282,7 @@ lnet_kiov_nob(unsigned int niov, lnet_kiov_t *kiov) { unsigned int nob = 0; + LASSERT(!niov || kiov); while (niov-- > 0) nob += (kiov++)->kiov_len; @@ -297,7 +300,7 @@ lnet_copy_kiov2kiov(unsigned int ndiov, lnet_kiov_t *diov, unsigned int doffset, char *daddr = NULL; char *saddr = NULL; - if (nob == 0) + if (!nob) return; LASSERT(!in_interrupt()); @@ -325,17 +328,18 @@ lnet_copy_kiov2kiov(unsigned int ndiov, lnet_kiov_t *diov, unsigned int doffset, siov->kiov_len - soffset); this_nob = min(this_nob, nob); - if (daddr == NULL) + if (!daddr) daddr = ((char *)kmap(diov->kiov_page)) + diov->kiov_offset + doffset; - if (saddr == NULL) + if (!saddr) saddr = ((char *)kmap(siov->kiov_page)) + siov->kiov_offset + soffset; - /* Vanishing risk of kmap deadlock when mapping 2 pages. + /* + * Vanishing risk of kmap deadlock when mapping 2 pages. * However in practice at least one of the kiovs will be mapped - * kernel pages and the map/unmap will be NOOPs */ - + * kernel pages and the map/unmap will be NOOPs + */ memcpy(daddr, saddr, this_nob); nob -= this_nob; @@ -362,9 +366,9 @@ lnet_copy_kiov2kiov(unsigned int ndiov, lnet_kiov_t *diov, unsigned int doffset, } } while (nob > 0); - if (daddr != NULL) + if (daddr) kunmap(diov->kiov_page); - if (saddr != NULL) + if (saddr) kunmap(siov->kiov_page); } EXPORT_SYMBOL(lnet_copy_kiov2kiov); @@ -378,7 +382,7 @@ lnet_copy_kiov2iov(unsigned int niov, struct kvec *iov, unsigned int iovoffset, unsigned int this_nob; char *addr = NULL; - if (nob == 0) + if (!nob) return; LASSERT(!in_interrupt()); @@ -406,7 +410,7 @@ lnet_copy_kiov2iov(unsigned int niov, struct kvec *iov, unsigned int iovoffset, (__kernel_size_t) kiov->kiov_len - kiovoffset); this_nob = min(this_nob, nob); - if (addr == NULL) + if (!addr) addr = ((char *)kmap(kiov->kiov_page)) + kiov->kiov_offset + kiovoffset; @@ -434,7 +438,7 @@ lnet_copy_kiov2iov(unsigned int niov, struct kvec *iov, unsigned int iovoffset, } while (nob > 0); - if (addr != NULL) + if (addr) kunmap(kiov->kiov_page); } EXPORT_SYMBOL(lnet_copy_kiov2iov); @@ -449,7 +453,7 @@ lnet_copy_iov2kiov(unsigned int nkiov, lnet_kiov_t *kiov, unsigned int this_nob; char *addr = NULL; - if (nob == 0) + if (!nob) return; LASSERT(!in_interrupt()); @@ -477,7 +481,7 @@ lnet_copy_iov2kiov(unsigned int nkiov, lnet_kiov_t *kiov, iov->iov_len - iovoffset); this_nob = min(this_nob, nob); - if (addr == NULL) + if (!addr) addr = ((char *)kmap(kiov->kiov_page)) + kiov->kiov_offset + kiovoffset; @@ -504,23 +508,25 @@ lnet_copy_iov2kiov(unsigned int nkiov, lnet_kiov_t *kiov, } } while (nob > 0); - if (addr != NULL) + if (addr) kunmap(kiov->kiov_page); } EXPORT_SYMBOL(lnet_copy_iov2kiov); int lnet_extract_kiov(int dst_niov, lnet_kiov_t *dst, - int src_niov, lnet_kiov_t *src, - unsigned int offset, unsigned int len) + int src_niov, lnet_kiov_t *src, + unsigned int offset, unsigned int len) { - /* Initialise 'dst' to the subset of 'src' starting at 'offset', + /* + * Initialise 'dst' to the subset of 'src' starting at 'offset', * for exactly 'len' bytes, and return the number of entries. - * NB not destructive to 'src' */ + * NB not destructive to 'src' + */ unsigned int frag_len; unsigned int niov; - if (len == 0) /* no data => */ + if (!len) /* no data => */ return 0; /* no frags */ LASSERT(src_niov > 0); @@ -543,12 +549,12 @@ lnet_extract_kiov(int dst_niov, lnet_kiov_t *dst, if (len <= frag_len) { dst->kiov_len = len; LASSERT(dst->kiov_offset + dst->kiov_len - <= PAGE_CACHE_SIZE); + <= PAGE_SIZE); return niov; } dst->kiov_len = frag_len; - LASSERT(dst->kiov_offset + dst->kiov_len <= PAGE_CACHE_SIZE); + LASSERT(dst->kiov_offset + dst->kiov_len <= PAGE_SIZE); len -= frag_len; dst++; @@ -560,7 +566,7 @@ lnet_extract_kiov(int dst_niov, lnet_kiov_t *dst, } EXPORT_SYMBOL(lnet_extract_kiov); -static void +void lnet_ni_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, unsigned int offset, unsigned int mlen, unsigned int rlen) { @@ -570,9 +576,9 @@ lnet_ni_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, int rc; LASSERT(!in_interrupt()); - LASSERT(mlen == 0 || msg != NULL); + LASSERT(!mlen || msg); - if (msg != NULL) { + if (msg) { LASSERT(msg->msg_receiving); LASSERT(!msg->msg_sending); LASSERT(rlen == msg->msg_len); @@ -582,18 +588,18 @@ lnet_ni_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, msg->msg_receiving = 0; - if (mlen != 0) { + if (mlen) { niov = msg->msg_niov; iov = msg->msg_iov; kiov = msg->msg_kiov; LASSERT(niov > 0); - LASSERT((iov == NULL) != (kiov == NULL)); + LASSERT(!iov != !kiov); } } - rc = (ni->ni_lnd->lnd_recv)(ni, private, msg, delayed, - niov, iov, kiov, offset, mlen, rlen); + rc = ni->ni_lnd->lnd_recv(ni, private, msg, delayed, + niov, iov, kiov, offset, mlen, rlen); if (rc < 0) lnet_finalize(ni, msg, rc); } @@ -605,13 +611,13 @@ lnet_setpayloadbuffer(lnet_msg_t *msg) LASSERT(msg->msg_len > 0); LASSERT(!msg->msg_routing); - LASSERT(md != NULL); - LASSERT(msg->msg_niov == 0); - LASSERT(msg->msg_iov == NULL); - LASSERT(msg->msg_kiov == NULL); + LASSERT(md); + LASSERT(!msg->msg_niov); + LASSERT(!msg->msg_iov); + LASSERT(!msg->msg_kiov); msg->msg_niov = md->md_niov; - if ((md->md_options & LNET_MD_KIOV) != 0) + if (md->md_options & LNET_MD_KIOV) msg->msg_kiov = md->md_iov.kiov; else msg->msg_iov = md->md_iov.iov; @@ -626,7 +632,7 @@ lnet_prep_send(lnet_msg_t *msg, int type, lnet_process_id_t target, msg->msg_len = len; msg->msg_offset = offset; - if (len != 0) + if (len) lnet_setpayloadbuffer(msg); memset(&msg->msg_hdr, 0, sizeof(msg->msg_hdr)); @@ -646,9 +652,9 @@ lnet_ni_send(lnet_ni_t *ni, lnet_msg_t *msg) LASSERT(!in_interrupt()); LASSERT(LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND || - (msg->msg_txcredit && msg->msg_peertxcredit)); + (msg->msg_txcredit && msg->msg_peertxcredit)); - rc = (ni->ni_lnd->lnd_send)(ni, priv, msg); + rc = ni->ni_lnd->lnd_send(ni, priv, msg); if (rc < 0) lnet_finalize(ni, msg, rc); } @@ -661,12 +667,12 @@ lnet_ni_eager_recv(lnet_ni_t *ni, lnet_msg_t *msg) LASSERT(!msg->msg_sending); LASSERT(msg->msg_receiving); LASSERT(!msg->msg_rx_ready_delay); - LASSERT(ni->ni_lnd->lnd_eager_recv != NULL); + LASSERT(ni->ni_lnd->lnd_eager_recv); msg->msg_rx_ready_delay = 1; - rc = (ni->ni_lnd->lnd_eager_recv)(ni, msg->msg_private, msg, - &msg->msg_private); - if (rc != 0) { + rc = ni->ni_lnd->lnd_eager_recv(ni, msg->msg_private, msg, + &msg->msg_private); + if (rc) { CERROR("recv from %s / send to %s aborted: eager_recv failed %d\n", libcfs_nid2str(msg->msg_rxpeer->lp_nid), libcfs_id2str(msg->msg_target), rc); @@ -683,15 +689,15 @@ lnet_ni_query_locked(lnet_ni_t *ni, lnet_peer_t *lp) unsigned long last_alive = 0; LASSERT(lnet_peer_aliveness_enabled(lp)); - LASSERT(ni->ni_lnd->lnd_query != NULL); + LASSERT(ni->ni_lnd->lnd_query); lnet_net_unlock(lp->lp_cpt); - (ni->ni_lnd->lnd_query)(ni, lp->lp_nid, &last_alive); + ni->ni_lnd->lnd_query(ni, lp->lp_nid, &last_alive); lnet_net_lock(lp->lp_cpt); lp->lp_last_query = cfs_time_current(); - if (last_alive != 0) /* NI has updated timestamp */ + if (last_alive) /* NI has updated timestamp */ lp->lp_last_alive = last_alive; } @@ -720,14 +726,16 @@ lnet_peer_is_alive(lnet_peer_t *lp, unsigned long now) * case, and moreover lp_last_alive at peer creation is assumed. */ if (alive && !lp->lp_alive && - !(lnet_isrouter(lp) && lp->lp_alive_count == 0)) + !(lnet_isrouter(lp) && !lp->lp_alive_count)) lnet_notify_locked(lp, 0, 1, lp->lp_last_alive); return alive; } -/* NB: returns 1 when alive, 0 when dead, negative when error; - * may drop the lnet_net_lock */ +/* + * NB: returns 1 when alive, 0 when dead, negative when error; + * may drop the lnet_net_lock + */ static int lnet_peer_alive_locked(lnet_peer_t *lp) { @@ -739,9 +747,11 @@ lnet_peer_alive_locked(lnet_peer_t *lp) if (lnet_peer_is_alive(lp, now)) return 1; - /* Peer appears dead, but we should avoid frequent NI queries (at - * most once per lnet_queryinterval seconds). */ - if (lp->lp_last_query != 0) { + /* + * Peer appears dead, but we should avoid frequent NI queries (at + * most once per lnet_queryinterval seconds). + */ + if (lp->lp_last_query) { static const int lnet_queryinterval = 1; unsigned long next_query = @@ -775,10 +785,10 @@ lnet_peer_alive_locked(lnet_peer_t *lp) * lnet_send() is going to lnet_net_unlock immediately after this, so * it sets do_send FALSE and I don't do the unlock/send/lock bit. * - * \retval 0 If \a msg sent or OK to send. - * \retval EAGAIN If \a msg blocked for credit. - * \retval EHOSTUNREACH If the next hop of the message appears dead. - * \retval ECANCELED If the MD of the message has been unlinked. + * \retval LNET_CREDIT_OK If \a msg sent or OK to send. + * \retval LNET_CREDIT_WAIT If \a msg blocked for credit. + * \retval -EHOSTUNREACH If the next hop of the message appears dead. + * \retval -ECANCELED If the MD of the message has been unlinked. */ static int lnet_post_send_locked(lnet_msg_t *msg, int do_send) @@ -794,8 +804,8 @@ lnet_post_send_locked(lnet_msg_t *msg, int do_send) LASSERT(msg->msg_tx_committed); /* NB 'lp' is always the next hop */ - if ((msg->msg_target.pid & LNET_PID_USERFLAG) == 0 && - lnet_peer_alive_locked(lp) == 0) { + if (!(msg->msg_target.pid & LNET_PID_USERFLAG) && + !lnet_peer_alive_locked(lp)) { the_lnet.ln_counters[cpt]->drop_count++; the_lnet.ln_counters[cpt]->drop_length += msg->msg_len; lnet_net_unlock(cpt); @@ -806,11 +816,11 @@ lnet_post_send_locked(lnet_msg_t *msg, int do_send) lnet_finalize(ni, msg, -EHOSTUNREACH); lnet_net_lock(cpt); - return EHOSTUNREACH; + return -EHOSTUNREACH; } - if (msg->msg_md != NULL && - (msg->msg_md->md_flags & LNET_MD_FLAG_ABORTED) != 0) { + if (msg->msg_md && + (msg->msg_md->md_flags & LNET_MD_FLAG_ABORTED)) { lnet_net_unlock(cpt); CNETERR("Aborting message for %s: LNetM[DE]Unlink() already called on the MD/ME.\n", @@ -819,12 +829,12 @@ lnet_post_send_locked(lnet_msg_t *msg, int do_send) lnet_finalize(ni, msg, -ECANCELED); lnet_net_lock(cpt); - return ECANCELED; + return -ECANCELED; } if (!msg->msg_peertxcredit) { LASSERT((lp->lp_txcredits < 0) == - !list_empty(&lp->lp_txq)); + !list_empty(&lp->lp_txq)); msg->msg_peertxcredit = 1; lp->lp_txqnob += msg->msg_len + sizeof(lnet_hdr_t); @@ -836,7 +846,7 @@ lnet_post_send_locked(lnet_msg_t *msg, int do_send) if (lp->lp_txcredits < 0) { msg->msg_tx_delayed = 1; list_add_tail(&msg->msg_list, &lp->lp_txq); - return EAGAIN; + return LNET_CREDIT_WAIT; } } @@ -853,7 +863,7 @@ lnet_post_send_locked(lnet_msg_t *msg, int do_send) if (tq->tq_credits < 0) { msg->msg_tx_delayed = 1; list_add_tail(&msg->msg_list, &tq->tq_delayed); - return EAGAIN; + return LNET_CREDIT_WAIT; } } @@ -862,7 +872,7 @@ lnet_post_send_locked(lnet_msg_t *msg, int do_send) lnet_ni_send(ni, msg); lnet_net_lock(cpt); } - return 0; + return LNET_CREDIT_OK; } static lnet_rtrbufpool_t * @@ -877,7 +887,7 @@ lnet_msg2bufpool(lnet_msg_t *msg) rbp = &the_lnet.ln_rtrpools[cpt][0]; LASSERT(msg->msg_len <= LNET_MTU); - while (msg->msg_len > (unsigned int)rbp->rbp_npages * PAGE_CACHE_SIZE) { + while (msg->msg_len > (unsigned int)rbp->rbp_npages * PAGE_SIZE) { rbp++; LASSERT(rbp < &the_lnet.ln_rtrpools[cpt][LNET_NRBPOOLS]); } @@ -888,16 +898,19 @@ lnet_msg2bufpool(lnet_msg_t *msg) static int lnet_post_routed_recv_locked(lnet_msg_t *msg, int do_recv) { - /* lnet_parse is going to lnet_net_unlock immediately after this, so it - * sets do_recv FALSE and I don't do the unlock/send/lock bit. I - * return EAGAIN if msg blocked and 0 if received or OK to receive */ + /* + * lnet_parse is going to lnet_net_unlock immediately after this, so it + * sets do_recv FALSE and I don't do the unlock/send/lock bit. + * I return LNET_CREDIT_WAIT if msg blocked and LNET_CREDIT_OK if + * received or OK to receive + */ lnet_peer_t *lp = msg->msg_rxpeer; lnet_rtrbufpool_t *rbp; lnet_rtrbuf_t *rb; - LASSERT(msg->msg_iov == NULL); - LASSERT(msg->msg_kiov == NULL); - LASSERT(msg->msg_niov == 0); + LASSERT(!msg->msg_iov); + LASSERT(!msg->msg_kiov); + LASSERT(!msg->msg_niov); LASSERT(msg->msg_routing); LASSERT(msg->msg_receiving); LASSERT(!msg->msg_sending); @@ -907,7 +920,7 @@ lnet_post_routed_recv_locked(lnet_msg_t *msg, int do_recv) if (!msg->msg_peerrtrcredit) { LASSERT((lp->lp_rtrcredits < 0) == - !list_empty(&lp->lp_rtrq)); + !list_empty(&lp->lp_rtrq)); msg->msg_peerrtrcredit = 1; lp->lp_rtrcredits--; @@ -919,16 +932,13 @@ lnet_post_routed_recv_locked(lnet_msg_t *msg, int do_recv) LASSERT(msg->msg_rx_ready_delay); msg->msg_rx_delayed = 1; list_add_tail(&msg->msg_list, &lp->lp_rtrq); - return EAGAIN; + return LNET_CREDIT_WAIT; } } rbp = lnet_msg2bufpool(msg); if (!msg->msg_rtrcredit) { - LASSERT((rbp->rbp_credits < 0) == - !list_empty(&rbp->rbp_msgs)); - msg->msg_rtrcredit = 1; rbp->rbp_credits--; if (rbp->rbp_credits < rbp->rbp_mincredits) @@ -939,7 +949,7 @@ lnet_post_routed_recv_locked(lnet_msg_t *msg, int do_recv) LASSERT(msg->msg_rx_ready_delay); msg->msg_rx_delayed = 1; list_add_tail(&msg->msg_list, &rbp->rbp_msgs); - return EAGAIN; + return LNET_CREDIT_WAIT; } } @@ -958,7 +968,7 @@ lnet_post_routed_recv_locked(lnet_msg_t *msg, int do_recv) 0, msg->msg_len, msg->msg_len); lnet_net_lock(cpt); } - return 0; + return LNET_CREDIT_OK; } void @@ -980,7 +990,7 @@ lnet_return_tx_credits_locked(lnet_msg_t *msg) tq->tq_credits++; if (tq->tq_credits <= 0) { msg2 = list_entry(tq->tq_delayed.next, - lnet_msg_t, msg_list); + lnet_msg_t, msg_list); list_del(&msg2->msg_list); LASSERT(msg2->msg_txpeer->lp_ni == ni); @@ -1003,7 +1013,7 @@ lnet_return_tx_credits_locked(lnet_msg_t *msg) txpeer->lp_txcredits++; if (txpeer->lp_txcredits <= 0) { msg2 = list_entry(txpeer->lp_txq.next, - lnet_msg_t, msg_list); + lnet_msg_t, msg_list); list_del(&msg2->msg_list); LASSERT(msg2->msg_txpeer == txpeer); @@ -1013,13 +1023,50 @@ lnet_return_tx_credits_locked(lnet_msg_t *msg) } } - if (txpeer != NULL) { + if (txpeer) { msg->msg_txpeer = NULL; lnet_peer_decref_locked(txpeer); } } void +lnet_schedule_blocked_locked(lnet_rtrbufpool_t *rbp) +{ + lnet_msg_t *msg; + + if (list_empty(&rbp->rbp_msgs)) + return; + msg = list_entry(rbp->rbp_msgs.next, + lnet_msg_t, msg_list); + list_del(&msg->msg_list); + + (void)lnet_post_routed_recv_locked(msg, 1); +} + +void +lnet_drop_routed_msgs_locked(struct list_head *list, int cpt) +{ + struct list_head drop; + lnet_msg_t *msg; + lnet_msg_t *tmp; + + INIT_LIST_HEAD(&drop); + + list_splice_init(list, &drop); + + lnet_net_unlock(cpt); + + list_for_each_entry_safe(msg, tmp, &drop, msg_list) { + lnet_ni_recv(msg->msg_rxpeer->lp_ni, msg->msg_private, NULL, + 0, 0, 0, msg->msg_hdr.payload_length); + list_del_init(&msg->msg_list); + lnet_finalize(NULL, msg, -ECANCELED); + } + + lnet_net_lock(cpt); +} + +void lnet_return_rx_credits_locked(lnet_msg_t *msg) { lnet_peer_t *rxpeer = msg->msg_rxpeer; @@ -1030,34 +1077,51 @@ lnet_return_rx_credits_locked(lnet_msg_t *msg) lnet_rtrbuf_t *rb; lnet_rtrbufpool_t *rbp; - /* NB If a msg ever blocks for a buffer in rbp_msgs, it stays + /* + * NB If a msg ever blocks for a buffer in rbp_msgs, it stays * there until it gets one allocated, or aborts the wait - * itself */ - LASSERT(msg->msg_kiov != NULL); + * itself + */ + LASSERT(msg->msg_kiov); rb = list_entry(msg->msg_kiov, lnet_rtrbuf_t, rb_kiov[0]); rbp = rb->rb_pool; - LASSERT(rbp == lnet_msg2bufpool(msg)); msg->msg_kiov = NULL; msg->msg_rtrcredit = 0; - LASSERT((rbp->rbp_credits < 0) == - !list_empty(&rbp->rbp_msgs)); + LASSERT(rbp == lnet_msg2bufpool(msg)); + LASSERT((rbp->rbp_credits > 0) == !list_empty(&rbp->rbp_bufs)); - list_add(&rb->rb_list, &rbp->rbp_bufs); - rbp->rbp_credits++; - if (rbp->rbp_credits <= 0) { - msg2 = list_entry(rbp->rbp_msgs.next, - lnet_msg_t, msg_list); - list_del(&msg2->msg_list); + /* + * If routing is now turned off, we just drop this buffer and + * don't bother trying to return credits. + */ + if (!the_lnet.ln_routing) { + lnet_destroy_rtrbuf(rb, rbp->rbp_npages); + goto routing_off; + } - (void) lnet_post_routed_recv_locked(msg2, 1); + /* + * It is possible that a user has lowered the desired number of + * buffers in this pool. Make sure we never put back + * more buffers than the stated number. + */ + if (unlikely(rbp->rbp_credits >= rbp->rbp_req_nbuffers)) { + /* Discard this buffer so we don't have too many. */ + lnet_destroy_rtrbuf(rb, rbp->rbp_npages); + rbp->rbp_nbuffers--; + } else { + list_add(&rb->rb_list, &rbp->rbp_bufs); + rbp->rbp_credits++; + if (rbp->rbp_credits <= 0) + lnet_schedule_blocked_locked(rbp); } } +routing_off: if (msg->msg_peerrtrcredit) { /* give back peer router credits */ msg->msg_peerrtrcredit = 0; @@ -1066,15 +1130,22 @@ lnet_return_rx_credits_locked(lnet_msg_t *msg) !list_empty(&rxpeer->lp_rtrq)); rxpeer->lp_rtrcredits++; - if (rxpeer->lp_rtrcredits <= 0) { + /* + * drop all messages which are queued to be routed on that + * peer. + */ + if (!the_lnet.ln_routing) { + lnet_drop_routed_msgs_locked(&rxpeer->lp_rtrq, + msg->msg_rx_cpt); + } else if (rxpeer->lp_rtrcredits <= 0) { msg2 = list_entry(rxpeer->lp_rtrq.next, - lnet_msg_t, msg_list); + lnet_msg_t, msg_list); list_del(&msg2->msg_list); (void) lnet_post_routed_recv_locked(msg2, 1); } } - if (rxpeer != NULL) { + if (rxpeer) { msg->msg_rxpeer = NULL; lnet_peer_decref_locked(rxpeer); } @@ -1085,94 +1156,99 @@ lnet_compare_routes(lnet_route_t *r1, lnet_route_t *r2) { lnet_peer_t *p1 = r1->lr_gateway; lnet_peer_t *p2 = r2->lr_gateway; + int r1_hops = (r1->lr_hops == LNET_UNDEFINED_HOPS) ? 1 : r1->lr_hops; + int r2_hops = (r2->lr_hops == LNET_UNDEFINED_HOPS) ? 1 : r2->lr_hops; if (r1->lr_priority < r2->lr_priority) return 1; if (r1->lr_priority > r2->lr_priority) - return -1; + return -ERANGE; - if (r1->lr_hops < r2->lr_hops) + if (r1_hops < r2_hops) return 1; - if (r1->lr_hops > r2->lr_hops) - return -1; + if (r1_hops > r2_hops) + return -ERANGE; if (p1->lp_txqnob < p2->lp_txqnob) return 1; if (p1->lp_txqnob > p2->lp_txqnob) - return -1; + return -ERANGE; if (p1->lp_txcredits > p2->lp_txcredits) return 1; if (p1->lp_txcredits < p2->lp_txcredits) - return -1; + return -ERANGE; if (r1->lr_seq - r2->lr_seq <= 0) return 1; - return -1; + return -ERANGE; } static lnet_peer_t * lnet_find_route_locked(lnet_ni_t *ni, lnet_nid_t target, lnet_nid_t rtr_nid) { lnet_remotenet_t *rnet; - lnet_route_t *rtr; - lnet_route_t *rtr_best; - lnet_route_t *rtr_last; + lnet_route_t *route; + lnet_route_t *best_route; + lnet_route_t *last_route; struct lnet_peer *lp_best; struct lnet_peer *lp; int rc; - /* If @rtr_nid is not LNET_NID_ANY, return the gateway with - * rtr_nid nid, otherwise find the best gateway I can use */ - + /* + * If @rtr_nid is not LNET_NID_ANY, return the gateway with + * rtr_nid nid, otherwise find the best gateway I can use + */ rnet = lnet_find_net_locked(LNET_NIDNET(target)); - if (rnet == NULL) + if (!rnet) return NULL; lp_best = NULL; - rtr_best = rtr_last = NULL; - list_for_each_entry(rtr, &rnet->lrn_routes, lr_list) { - lp = rtr->lr_gateway; + best_route = NULL; + last_route = NULL; + list_for_each_entry(route, &rnet->lrn_routes, lr_list) { + lp = route->lr_gateway; - if (!lp->lp_alive || /* gateway is down */ - ((lp->lp_ping_feats & LNET_PING_FEAT_NI_STATUS) != 0 && - rtr->lr_downis != 0)) /* NI to target is down */ + if (!lnet_is_route_alive(route)) continue; - if (ni != NULL && lp->lp_ni != ni) + if (ni && lp->lp_ni != ni) continue; if (lp->lp_nid == rtr_nid) /* it's pre-determined router */ return lp; - if (lp_best == NULL) { - rtr_best = rtr_last = rtr; + if (!lp_best) { + best_route = route; + last_route = route; lp_best = lp; continue; } /* no protection on below fields, but it's harmless */ - if (rtr_last->lr_seq - rtr->lr_seq < 0) - rtr_last = rtr; + if (last_route->lr_seq - route->lr_seq < 0) + last_route = route; - rc = lnet_compare_routes(rtr, rtr_best); + rc = lnet_compare_routes(route, best_route); if (rc < 0) continue; - rtr_best = rtr; + best_route = route; lp_best = lp; } - /* set sequence number on the best router to the latest sequence + 1 + /* + * set sequence number on the best router to the latest sequence + 1 * so we can round-robin all routers, it's race and inaccurate but - * harmless and functional */ - if (rtr_best != NULL) - rtr_best->lr_seq = rtr_last->lr_seq + 1; + * harmless and functional + */ + if (best_route) + best_route->lr_seq = last_route->lr_seq + 1; return lp_best; } @@ -1187,11 +1263,13 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid) int cpt2; int rc; - /* NB: rtr_nid is set to LNET_NID_ANY for all current use-cases, + /* + * NB: rtr_nid is set to LNET_NID_ANY for all current use-cases, * but we might want to use pre-determined router for ACK/REPLY - * in the future */ - /* NB: ni != NULL == interface pre-determined (ACK/REPLY) */ - LASSERT(msg->msg_txpeer == NULL); + * in the future + */ + /* NB: ni == interface pre-determined (ACK/REPLY) */ + LASSERT(!msg->msg_txpeer); LASSERT(!msg->msg_sending); LASSERT(!msg->msg_target_is_router); LASSERT(!msg->msg_receiving); @@ -1212,7 +1290,7 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid) src_ni = NULL; } else { src_ni = lnet_nid2ni_locked(src_nid, cpt); - if (src_ni == NULL) { + if (!src_ni) { lnet_net_unlock(cpt); LCONSOLE_WARN("Can't send to %s: src %s is not a local nid\n", libcfs_nid2str(dst_nid), @@ -1225,8 +1303,8 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid) /* Is this for someone on a local network? */ local_ni = lnet_net2ni_locked(LNET_NIDNET(dst_nid), cpt); - if (local_ni != NULL) { - if (src_ni == NULL) { + if (local_ni) { + if (!src_ni) { src_ni = local_ni; src_nid = src_ni->ni_nid; } else if (src_ni == local_ni) { @@ -1261,7 +1339,7 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid) rc = lnet_nid2peer_locked(&lp, dst_nid, cpt); /* lp has ref on src_ni; lose mine */ lnet_ni_decref_locked(src_ni, cpt); - if (rc != 0) { + if (rc) { lnet_net_unlock(cpt); LCONSOLE_WARN("Error %d finding peer %s\n", rc, libcfs_nid2str(dst_nid)); @@ -1272,8 +1350,8 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid) } else { /* sending to a remote network */ lp = lnet_find_route_locked(src_ni, dst_nid, rtr_nid); - if (lp == NULL) { - if (src_ni != NULL) + if (!lp) { + if (src_ni) lnet_ni_decref_locked(src_ni, cpt); lnet_net_unlock(cpt); @@ -1283,14 +1361,16 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid) return -EHOSTUNREACH; } - /* rtr_nid is LNET_NID_ANY or NID of pre-determined router, + /* + * rtr_nid is LNET_NID_ANY or NID of pre-determined router, * it's possible that rtr_nid isn't LNET_NID_ANY and lp isn't * pre-determined router, this can happen if router table - * was changed when we release the lock */ + * was changed when we release the lock + */ if (rtr_nid != lp->lp_nid) { cpt2 = lnet_cpt_of_nid_locked(lp->lp_nid); if (cpt2 != cpt) { - if (src_ni != NULL) + if (src_ni) lnet_ni_decref_locked(src_ni, cpt); lnet_net_unlock(cpt); @@ -1304,7 +1384,7 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid) libcfs_nid2str(dst_nid), libcfs_nid2str(lp->lp_nid), lnet_msgtyp2str(msg->msg_type), msg->msg_len); - if (src_ni == NULL) { + if (!src_ni) { src_ni = lp->lp_ni; src_nid = src_ni->ni_nid; } else { @@ -1324,30 +1404,30 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid) msg->msg_target_is_router = 1; msg->msg_target.nid = lp->lp_nid; - msg->msg_target.pid = LUSTRE_SRV_LNET_PID; + msg->msg_target.pid = LNET_PID_LUSTRE; } /* 'lp' is our best choice of peer */ LASSERT(!msg->msg_peertxcredit); LASSERT(!msg->msg_txcredit); - LASSERT(msg->msg_txpeer == NULL); + LASSERT(!msg->msg_txpeer); msg->msg_txpeer = lp; /* msg takes my ref on lp */ rc = lnet_post_send_locked(msg, 0); lnet_net_unlock(cpt); - if (rc == EHOSTUNREACH || rc == ECANCELED) - return -rc; + if (rc < 0) + return rc; - if (rc == 0) + if (rc == LNET_CREDIT_OK) lnet_ni_send(src_ni, msg); - return 0; /* rc == 0 or EAGAIN */ + return 0; /* rc == LNET_CREDIT_OK or LNET_CREDIT_WAIT */ } -static void +void lnet_drop_message(lnet_ni_t *ni, int cpt, void *private, unsigned int nob) { lnet_net_lock(cpt); @@ -1363,15 +1443,17 @@ lnet_recv_put(lnet_ni_t *ni, lnet_msg_t *msg) { lnet_hdr_t *hdr = &msg->msg_hdr; - if (msg->msg_wanted != 0) + if (msg->msg_wanted) lnet_setpayloadbuffer(msg); lnet_build_msg_event(msg, LNET_EVENT_PUT); - /* Must I ACK? If so I'll grab the ack_wmd out of the header and put - * it back into the ACK during lnet_finalize() */ - msg->msg_ack = (!lnet_is_wire_handle_none(&hdr->msg.put.ack_wmd) && - (msg->msg_md->md_options & LNET_MD_ACK_DISABLE) == 0); + /* + * Must I ACK? If so I'll grab the ack_wmd out of the header and put + * it back into the ACK during lnet_finalize() + */ + msg->msg_ack = !lnet_is_wire_handle_none(&hdr->msg.put.ack_wmd) && + !(msg->msg_md->md_options & LNET_MD_ACK_DISABLE); lnet_ni_recv(ni, msg->msg_private, msg, msg->msg_rx_delayed, msg->msg_offset, msg->msg_wanted, hdr->payload_length); @@ -1382,6 +1464,7 @@ lnet_parse_put(lnet_ni_t *ni, lnet_msg_t *msg) { lnet_hdr_t *hdr = &msg->msg_hdr; struct lnet_match_info info; + bool ready_delay; int rc; /* Convert put fields to host byte order */ @@ -1397,7 +1480,8 @@ lnet_parse_put(lnet_ni_t *ni, lnet_msg_t *msg) info.mi_roffset = hdr->msg.put.offset; info.mi_mbits = hdr->msg.put.match_bits; - msg->msg_rx_ready_delay = ni->ni_lnd->lnd_eager_recv == NULL; + msg->msg_rx_ready_delay = !ni->ni_lnd->lnd_eager_recv; + ready_delay = msg->msg_rx_ready_delay; again: rc = lnet_ptl_match_md(&info, msg); @@ -1410,12 +1494,18 @@ lnet_parse_put(lnet_ni_t *ni, lnet_msg_t *msg) return 0; case LNET_MATCHMD_NONE: - if (msg->msg_rx_delayed) /* attached on delayed list */ + /** + * no eager_recv or has already called it, should + * have been attached on delayed list + */ + if (ready_delay) return 0; rc = lnet_ni_eager_recv(ni, msg); - if (rc == 0) + if (!rc) { + ready_delay = true; goto again; + } /* fall through */ case LNET_MATCHMD_DROP: @@ -1423,7 +1513,7 @@ lnet_parse_put(lnet_ni_t *ni, lnet_msg_t *msg) libcfs_id2str(info.mi_id), info.mi_portal, info.mi_mbits, info.mi_roffset, info.mi_rlength, rc); - return ENOENT; /* +ve: OK but no match */ + return -ENOENT; /* -ve: OK but no match */ } } @@ -1454,7 +1544,7 @@ lnet_parse_get(lnet_ni_t *ni, lnet_msg_t *msg, int rdma_get) CNETERR("Dropping GET from %s portal %d match %llu offset %d length %d\n", libcfs_id2str(info.mi_id), info.mi_portal, info.mi_mbits, info.mi_roffset, info.mi_rlength); - return ENOENT; /* +ve: OK but no match */ + return -ENOENT; /* -ve: OK but no match */ } LASSERT(rc == LNET_MATCHMD_OK); @@ -1510,33 +1600,33 @@ lnet_parse_reply(lnet_ni_t *ni, lnet_msg_t *msg) /* NB handles only looked up by creator (no flips) */ md = lnet_wire_handle2md(&hdr->msg.reply.dst_wmd); - if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) { + if (!md || !md->md_threshold || md->md_me) { CNETERR("%s: Dropping REPLY from %s for %s MD %#llx.%#llx\n", libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), - (md == NULL) ? "invalid" : "inactive", + !md ? "invalid" : "inactive", hdr->msg.reply.dst_wmd.wh_interface_cookie, hdr->msg.reply.dst_wmd.wh_object_cookie); - if (md != NULL && md->md_me != NULL) + if (md && md->md_me) CERROR("REPLY MD also attached to portal %d\n", md->md_me->me_portal); lnet_res_unlock(cpt); - return ENOENT; /* +ve: OK but no match */ + return -ENOENT; /* -ve: OK but no match */ } - LASSERT(md->md_offset == 0); + LASSERT(!md->md_offset); rlength = hdr->payload_length; mlength = min_t(uint, rlength, md->md_length); if (mlength < rlength && - (md->md_options & LNET_MD_TRUNCATE) == 0) { + !(md->md_options & LNET_MD_TRUNCATE)) { CNETERR("%s: Dropping REPLY from %s length %d for MD %#llx would overflow (%d)\n", libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), rlength, hdr->msg.reply.dst_wmd.wh_object_cookie, mlength); lnet_res_unlock(cpt); - return ENOENT; /* +ve: OK but no match */ + return -ENOENT; /* -ve: OK but no match */ } CDEBUG(D_NET, "%s: Reply from %s of length %d/%d into md %#llx\n", @@ -1545,7 +1635,7 @@ lnet_parse_reply(lnet_ni_t *ni, lnet_msg_t *msg) lnet_msg_attach_md(msg, md, 0, mlength); - if (mlength != 0) + if (mlength) lnet_setpayloadbuffer(msg); lnet_res_unlock(cpt); @@ -1576,20 +1666,20 @@ lnet_parse_ack(lnet_ni_t *ni, lnet_msg_t *msg) /* NB handles only looked up by creator (no flips) */ md = lnet_wire_handle2md(&hdr->msg.ack.dst_wmd); - if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) { + if (!md || !md->md_threshold || md->md_me) { /* Don't moan; this is expected */ CDEBUG(D_NET, "%s: Dropping ACK from %s to %s MD %#llx.%#llx\n", libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), - (md == NULL) ? "invalid" : "inactive", + !md ? "invalid" : "inactive", hdr->msg.ack.dst_wmd.wh_interface_cookie, hdr->msg.ack.dst_wmd.wh_object_cookie); - if (md != NULL && md->md_me != NULL) + if (md && md->md_me) CERROR("Source MD also attached to portal %d\n", md->md_me->me_portal); lnet_res_unlock(cpt); - return ENOENT; /* +ve! */ + return -ENOENT; /* -ve! */ } CDEBUG(D_NET, "%s: ACK from %s into md %#llx\n", @@ -1606,14 +1696,22 @@ lnet_parse_ack(lnet_ni_t *ni, lnet_msg_t *msg) return 0; } -static int +/** + * \retval LNET_CREDIT_OK If \a msg is forwarded + * \retval LNET_CREDIT_WAIT If \a msg is blocked because w/o buffer + * \retval -ve error code + */ +int lnet_parse_forward_locked(lnet_ni_t *ni, lnet_msg_t *msg) { int rc = 0; + if (!the_lnet.ln_routing) + return -ECANCELED; + if (msg->msg_rxpeer->lp_rtrcredits <= 0 || lnet_msg2bufpool(msg)->rbp_credits <= 0) { - if (ni->ni_lnd->lnd_eager_recv == NULL) { + if (!ni->ni_lnd->lnd_eager_recv) { msg->msg_rx_ready_delay = 1; } else { lnet_net_unlock(msg->msg_rx_cpt); @@ -1622,11 +1720,38 @@ lnet_parse_forward_locked(lnet_ni_t *ni, lnet_msg_t *msg) } } - if (rc == 0) + if (!rc) rc = lnet_post_routed_recv_locked(msg, 0); return rc; } +int +lnet_parse_local(lnet_ni_t *ni, lnet_msg_t *msg) +{ + int rc; + + switch (msg->msg_type) { + case LNET_MSG_ACK: + rc = lnet_parse_ack(ni, msg); + break; + case LNET_MSG_PUT: + rc = lnet_parse_put(ni, msg); + break; + case LNET_MSG_GET: + rc = lnet_parse_get(ni, msg, msg->msg_rdma_get); + break; + case LNET_MSG_REPLY: + rc = lnet_parse_reply(ni, msg); + break; + default: /* prevent an unused label if !kernel */ + LASSERT(0); + return -EPROTO; + } + + LASSERT(!rc || rc == -ENOENT); + return rc; +} + char * lnet_msgtyp2str(int type) { @@ -1702,7 +1827,6 @@ lnet_print_hdr(lnet_hdr_t *hdr) hdr->msg.reply.dst_wmd.wh_object_cookie, hdr->payload_length); } - } int @@ -1765,20 +1889,20 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid, if (the_lnet.ln_routing && ni->ni_last_alive != ktime_get_real_seconds()) { - lnet_ni_lock(ni); - /* NB: so far here is the only place to set NI status to "up */ + lnet_ni_lock(ni); ni->ni_last_alive = ktime_get_real_seconds(); - if (ni->ni_status != NULL && + if (ni->ni_status && ni->ni_status->ns_status == LNET_NI_STATUS_DOWN) ni->ni_status->ns_status = LNET_NI_STATUS_UP; lnet_ni_unlock(ni); } - /* Regard a bad destination NID as a protocol error. Senders should + /* + * Regard a bad destination NID as a protocol error. Senders should * know what they're doing; if they don't they're misconfigured, buggy - * or malicious so we chop them off at the knees :) */ - + * or malicious so we chop them off at the knees :) + */ if (!for_me) { if (LNET_NIDNET(dest_nid) == LNET_NIDNET(ni->ni_nid)) { /* should have gone direct */ @@ -1790,8 +1914,10 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid, } if (lnet_islocalnid(dest_nid)) { - /* dest is another local NI; sender should have used - * this node's NID on its own network */ + /* + * dest is another local NI; sender should have used + * this node's NID on its own network + */ CERROR("%s, src %s: Bad dest nid %s (it's my nid but on a different network)\n", libcfs_nid2str(from_nid), libcfs_nid2str(src_nid), @@ -1816,9 +1942,10 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid, } } - /* Message looks OK; we're not going to return an error, so we MUST - * call back lnd_recv() come what may... */ - + /* + * Message looks OK; we're not going to return an error, so we MUST + * call back lnd_recv() come what may... + */ if (!list_empty(&the_lnet.ln_test_peers) && /* normally we don't */ fail_peer(src_nid, 0)) { /* shall we now? */ CERROR("%s, src %s: Dropping %s to simulate failure\n", @@ -1827,8 +1954,16 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid, goto drop; } + if (!list_empty(&the_lnet.ln_drop_rules) && + lnet_drop_rule_match(hdr)) { + CDEBUG(D_NET, "%s, src %s, dst %s: Dropping %s to simulate silent message loss\n", + libcfs_nid2str(from_nid), libcfs_nid2str(src_nid), + libcfs_nid2str(dest_nid), lnet_msgtyp2str(type)); + goto drop; + } + msg = lnet_msg_alloc(); - if (msg == NULL) { + if (!msg) { CERROR("%s, src %s: Dropping %s (out of memory)\n", libcfs_nid2str(from_nid), libcfs_nid2str(src_nid), lnet_msgtyp2str(type)); @@ -1838,11 +1973,12 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid, /* msg zeroed in lnet_msg_alloc; * i.e. flags all clear, pointers NULL etc */ - msg->msg_type = type; msg->msg_private = private; msg->msg_receiving = 1; - msg->msg_len = msg->msg_wanted = payload_length; + msg->msg_rdma_get = rdma_req; + msg->msg_wanted = payload_length; + msg->msg_len = payload_length; msg->msg_offset = 0; msg->msg_hdr = *hdr; /* for building message event */ @@ -1864,7 +2000,7 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid, lnet_net_lock(cpt); rc = lnet_nid2peer_locked(&msg->msg_rxpeer, from_nid, cpt); - if (rc != 0) { + if (rc) { lnet_net_unlock(cpt); CERROR("%s, src %s: Dropping %s (error %d looking up sender)\n", libcfs_nid2str(from_nid), libcfs_nid2str(src_nid), @@ -1888,13 +2024,21 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid, lnet_msg_commit(msg, cpt); + /* message delay simulation */ + if (unlikely(!list_empty(&the_lnet.ln_delay_rules) && + lnet_delay_rule_match_locked(hdr, msg))) { + lnet_net_unlock(cpt); + return 0; + } + if (!for_me) { rc = lnet_parse_forward_locked(ni, msg); lnet_net_unlock(cpt); if (rc < 0) goto free_drop; - if (rc == 0) { + + if (rc == LNET_CREDIT_OK) { lnet_ni_recv(ni, msg->msg_private, msg, 0, 0, payload_length, payload_length); } @@ -1903,32 +2047,13 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid, lnet_net_unlock(cpt); - switch (type) { - case LNET_MSG_ACK: - rc = lnet_parse_ack(ni, msg); - break; - case LNET_MSG_PUT: - rc = lnet_parse_put(ni, msg); - break; - case LNET_MSG_GET: - rc = lnet_parse_get(ni, msg, rdma_req); - break; - case LNET_MSG_REPLY: - rc = lnet_parse_reply(ni, msg); - break; - default: - LASSERT(0); - rc = -EPROTO; - goto free_drop; /* prevent an unused label if !kernel */ - } - - if (rc == 0) - return 0; - - LASSERT(rc == ENOENT); + rc = lnet_parse_local(ni, msg); + if (rc) + goto free_drop; + return 0; free_drop: - LASSERT(msg->msg_md == NULL); + LASSERT(!msg->msg_md); lnet_finalize(ni, msg, rc); drop: @@ -1950,9 +2075,9 @@ lnet_drop_delayed_msg_list(struct list_head *head, char *reason) id.nid = msg->msg_hdr.src_nid; id.pid = msg->msg_hdr.src_pid; - LASSERT(msg->msg_md == NULL); + LASSERT(!msg->msg_md); LASSERT(msg->msg_rx_delayed); - LASSERT(msg->msg_rxpeer != NULL); + LASSERT(msg->msg_rxpeer); LASSERT(msg->msg_hdr.type == LNET_MSG_PUT); CWARN("Dropping delayed PUT from %s portal %d match %llu offset %d length %d: %s\n", @@ -1962,10 +2087,11 @@ lnet_drop_delayed_msg_list(struct list_head *head, char *reason) msg->msg_hdr.msg.put.offset, msg->msg_hdr.payload_length, reason); - /* NB I can't drop msg's ref on msg_rxpeer until after I've + /* + * NB I can't drop msg's ref on msg_rxpeer until after I've * called lnet_drop_message(), so I just hang onto msg as well - * until that's done */ - + * until that's done + */ lnet_drop_message(msg->msg_rxpeer->lp_ni, msg->msg_rxpeer->lp_cpt, msg->msg_private, msg->msg_len); @@ -1988,15 +2114,16 @@ lnet_recv_delayed_msg_list(struct list_head *head) msg = list_entry(head->next, lnet_msg_t, msg_list); list_del(&msg->msg_list); - /* md won't disappear under me, since each msg - * holds a ref on it */ - + /* + * md won't disappear under me, since each msg + * holds a ref on it + */ id.nid = msg->msg_hdr.src_nid; id.pid = msg->msg_hdr.src_pid; LASSERT(msg->msg_rx_delayed); - LASSERT(msg->msg_md != NULL); - LASSERT(msg->msg_rxpeer != NULL); + LASSERT(msg->msg_md); + LASSERT(msg->msg_rxpeer); LASSERT(msg->msg_hdr.type == LNET_MSG_PUT); CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d match %llu offset %d length %d.\n", @@ -2064,7 +2191,6 @@ LNetPut(lnet_nid_t self, lnet_handle_md_t mdh, lnet_ack_req_t ack, int cpt; int rc; - LASSERT(the_lnet.ln_init); LASSERT(the_lnet.ln_refcount > 0); if (!list_empty(&the_lnet.ln_test_peers) && /* normally we don't */ @@ -2075,7 +2201,7 @@ LNetPut(lnet_nid_t self, lnet_handle_md_t mdh, lnet_ack_req_t ack, } msg = lnet_msg_alloc(); - if (msg == NULL) { + if (!msg) { CERROR("Dropping PUT to %s: ENOMEM on lnet_msg_t\n", libcfs_id2str(target)); return -ENOMEM; @@ -2086,11 +2212,11 @@ LNetPut(lnet_nid_t self, lnet_handle_md_t mdh, lnet_ack_req_t ack, lnet_res_lock(cpt); md = lnet_handle2md(&mdh); - if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) { + if (!md || !md->md_threshold || md->md_me) { CERROR("Dropping PUT (%llu:%d:%s): MD (%d) invalid\n", match_bits, portal, libcfs_id2str(target), - md == NULL ? -1 : md->md_threshold); - if (md != NULL && md->md_me != NULL) + !md ? -1 : md->md_threshold); + if (md && md->md_me) CERROR("Source MD also attached to portal %d\n", md->md_me->me_portal); lnet_res_unlock(cpt); @@ -2128,9 +2254,9 @@ LNetPut(lnet_nid_t self, lnet_handle_md_t mdh, lnet_ack_req_t ack, lnet_build_msg_event(msg, LNET_EVENT_SEND); rc = lnet_send(self, msg, LNET_NID_ANY); - if (rc != 0) { + if (rc) { CNETERR("Error sending PUT to %s: %d\n", - libcfs_id2str(target), rc); + libcfs_id2str(target), rc); lnet_finalize(NULL, msg, rc); } @@ -2142,13 +2268,14 @@ EXPORT_SYMBOL(LNetPut); lnet_msg_t * lnet_create_reply_msg(lnet_ni_t *ni, lnet_msg_t *getmsg) { - /* The LND can DMA direct to the GET md (i.e. no REPLY msg). This + /* + * The LND can DMA direct to the GET md (i.e. no REPLY msg). This * returns a msg for the LND to pass to lnet_finalize() when the sink * data has been received. * * CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when - * lnet_finalize() is called on it, so the LND must call this first */ - + * lnet_finalize() is called on it, so the LND must call this first + */ struct lnet_msg *msg = lnet_msg_alloc(); struct lnet_libmd *getmd = getmsg->msg_md; lnet_process_id_t peer_id = getmsg->msg_target; @@ -2157,26 +2284,26 @@ lnet_create_reply_msg(lnet_ni_t *ni, lnet_msg_t *getmsg) LASSERT(!getmsg->msg_target_is_router); LASSERT(!getmsg->msg_routing); + if (!msg) { + CERROR("%s: Dropping REPLY from %s: can't allocate msg\n", + libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id)); + goto drop; + } + cpt = lnet_cpt_of_cookie(getmd->md_lh.lh_cookie); lnet_res_lock(cpt); LASSERT(getmd->md_refcount > 0); - if (msg == NULL) { - CERROR("%s: Dropping REPLY from %s: can't allocate msg\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id)); - goto drop; - } - - if (getmd->md_threshold == 0) { + if (!getmd->md_threshold) { CERROR("%s: Dropping REPLY from %s for inactive MD %p\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id), - getmd); + libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id), + getmd); lnet_res_unlock(cpt); goto drop; } - LASSERT(getmd->md_offset == 0); + LASSERT(!getmd->md_offset); CDEBUG(D_NET, "%s: Reply from %s md %p\n", libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id), getmd); @@ -2209,7 +2336,7 @@ lnet_create_reply_msg(lnet_ni_t *ni, lnet_msg_t *getmsg) the_lnet.ln_counters[cpt]->drop_length += getmd->md_length; lnet_net_unlock(cpt); - if (msg != NULL) + if (msg) lnet_msg_free(msg); return NULL; @@ -2219,14 +2346,18 @@ EXPORT_SYMBOL(lnet_create_reply_msg); void lnet_set_reply_msg_len(lnet_ni_t *ni, lnet_msg_t *reply, unsigned int len) { - /* Set the REPLY length, now the RDMA that elides the REPLY message has - * completed and I know it. */ - LASSERT(reply != NULL); + /* + * Set the REPLY length, now the RDMA that elides the REPLY message has + * completed and I know it. + */ + LASSERT(reply); LASSERT(reply->msg_type == LNET_MSG_GET); LASSERT(reply->msg_ev.type == LNET_EVENT_REPLY); - /* NB I trusted my peer to RDMA. If she tells me she's written beyond - * the end of my buffer, I might as well be dead. */ + /* + * NB I trusted my peer to RDMA. If she tells me she's written beyond + * the end of my buffer, I might as well be dead. + */ LASSERT(len <= reply->msg_ev.mlength); reply->msg_ev.mlength = len; @@ -2264,7 +2395,6 @@ LNetGet(lnet_nid_t self, lnet_handle_md_t mdh, int cpt; int rc; - LASSERT(the_lnet.ln_init); LASSERT(the_lnet.ln_refcount > 0); if (!list_empty(&the_lnet.ln_test_peers) && /* normally we don't */ @@ -2275,7 +2405,7 @@ LNetGet(lnet_nid_t self, lnet_handle_md_t mdh, } msg = lnet_msg_alloc(); - if (msg == NULL) { + if (!msg) { CERROR("Dropping GET to %s: ENOMEM on lnet_msg_t\n", libcfs_id2str(target)); return -ENOMEM; @@ -2285,11 +2415,11 @@ LNetGet(lnet_nid_t self, lnet_handle_md_t mdh, lnet_res_lock(cpt); md = lnet_handle2md(&mdh); - if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) { + if (!md || !md->md_threshold || md->md_me) { CERROR("Dropping GET (%llu:%d:%s): MD (%d) invalid\n", match_bits, portal, libcfs_id2str(target), - md == NULL ? -1 : md->md_threshold); - if (md != NULL && md->md_me != NULL) + !md ? -1 : md->md_threshold); + if (md && md->md_me) CERROR("REPLY MD also attached to portal %d\n", md->md_me->me_portal); @@ -2323,7 +2453,7 @@ LNetGet(lnet_nid_t self, lnet_handle_md_t mdh, rc = lnet_send(self, msg, LNET_NID_ANY); if (rc < 0) { CNETERR("Error sending GET to %s: %d\n", - libcfs_id2str(target), rc); + libcfs_id2str(target), rc); lnet_finalize(NULL, msg, rc); } @@ -2358,12 +2488,12 @@ LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp) __u32 order = 2; struct list_head *rn_list; - /* if !local_nid_dist_zero, I don't return a distance of 0 ever + /* + * if !local_nid_dist_zero, I don't return a distance of 0 ever * (when lustre sees a distance of 0, it substitutes 0@lo), so I * keep order 0 free for 0@lo and order 1 free for a local NID - * match */ - - LASSERT(the_lnet.ln_init); + * match + */ LASSERT(the_lnet.ln_refcount > 0); cpt = lnet_net_lock_current(); @@ -2372,9 +2502,9 @@ LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp) ni = list_entry(e, lnet_ni_t, ni_list); if (ni->ni_nid == dstnid) { - if (srcnidp != NULL) + if (srcnidp) *srcnidp = dstnid; - if (orderp != NULL) { + if (orderp) { if (LNET_NETTYP(LNET_NIDNET(dstnid)) == LOLND) *orderp = 0; else @@ -2386,9 +2516,9 @@ LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp) } if (LNET_NIDNET(ni->ni_nid) == dstnet) { - if (srcnidp != NULL) + if (srcnidp) *srcnidp = ni->ni_nid; - if (orderp != NULL) + if (orderp) *orderp = order; lnet_net_unlock(cpt); return 1; @@ -2404,21 +2534,28 @@ LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp) if (rnet->lrn_net == dstnet) { lnet_route_t *route; lnet_route_t *shortest = NULL; + __u32 shortest_hops = LNET_UNDEFINED_HOPS; + __u32 route_hops; LASSERT(!list_empty(&rnet->lrn_routes)); list_for_each_entry(route, &rnet->lrn_routes, - lr_list) { - if (shortest == NULL || - route->lr_hops < shortest->lr_hops) + lr_list) { + route_hops = route->lr_hops; + if (route_hops == LNET_UNDEFINED_HOPS) + route_hops = 1; + if (!shortest || + route_hops < shortest_hops) { shortest = route; + shortest_hops = route_hops; + } } - LASSERT(shortest != NULL); - hops = shortest->lr_hops; - if (srcnidp != NULL) + LASSERT(shortest); + hops = shortest_hops; + if (srcnidp) *srcnidp = shortest->lr_gateway->lp_ni->ni_nid; - if (orderp != NULL) + if (orderp) *orderp = order; lnet_net_unlock(cpt); return hops + 1; diff --git a/drivers/staging/lustre/lnet/lnet/lib-msg.c b/drivers/staging/lustre/lnet/lnet/lib-msg.c index 43977e8df..f879d7f28 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-msg.c +++ b/drivers/staging/lustre/lnet/lnet/lib-msg.c @@ -74,7 +74,6 @@ lnet_build_msg_event(lnet_msg_t *msg, lnet_event_kind_t ev_type) ev->initiator.nid = LNET_NID_ANY; ev->initiator.pid = the_lnet.ln_pid; ev->sender = LNET_NID_ANY; - } else { /* event for passive message */ ev->target.pid = hdr->dest_pid; @@ -173,7 +172,7 @@ lnet_msg_decommit_tx(lnet_msg_t *msg, int status) lnet_event_t *ev = &msg->msg_ev; LASSERT(msg->msg_tx_committed); - if (status != 0) + if (status) goto out; counters = the_lnet.ln_counters[msg->msg_tx_cpt]; @@ -181,7 +180,7 @@ lnet_msg_decommit_tx(lnet_msg_t *msg, int status) default: /* routed message */ LASSERT(msg->msg_routing); LASSERT(msg->msg_rx_committed); - LASSERT(ev->type == 0); + LASSERT(!ev->type); counters->route_length += msg->msg_len; counters->route_count++; @@ -203,8 +202,10 @@ lnet_msg_decommit_tx(lnet_msg_t *msg, int status) case LNET_EVENT_GET: LASSERT(msg->msg_rx_committed); - /* overwritten while sending reply, we should never be - * here for optimized GET */ + /* + * overwritten while sending reply, we should never be + * here for optimized GET + */ LASSERT(msg->msg_type == LNET_MSG_REPLY); msg->msg_type = LNET_MSG_GET; /* fix type */ break; @@ -225,13 +226,13 @@ lnet_msg_decommit_rx(lnet_msg_t *msg, int status) LASSERT(!msg->msg_tx_committed); /* decommitted or never committed */ LASSERT(msg->msg_rx_committed); - if (status != 0) + if (status) goto out; counters = the_lnet.ln_counters[msg->msg_rx_cpt]; switch (ev->type) { default: - LASSERT(ev->type == 0); + LASSERT(!ev->type); LASSERT(msg->msg_routing); goto out; @@ -240,10 +241,12 @@ lnet_msg_decommit_rx(lnet_msg_t *msg, int status) break; case LNET_EVENT_GET: - /* type is "REPLY" if it's an optimized GET on passive side, + /* + * type is "REPLY" if it's an optimized GET on passive side, * because optimized GET will never be committed for sending, * so message type wouldn't be changed back to "GET" by - * lnet_msg_decommit_tx(), see details in lnet_parse_get() */ + * lnet_msg_decommit_tx(), see details in lnet_parse_get() + */ LASSERT(msg->msg_type == LNET_MSG_REPLY || msg->msg_type == LNET_MSG_GET); counters->send_length += msg->msg_wanted; @@ -254,8 +257,10 @@ lnet_msg_decommit_rx(lnet_msg_t *msg, int status) break; case LNET_EVENT_REPLY: - /* type is "GET" if it's an optimized GET on active side, - * see details in lnet_create_reply_msg() */ + /* + * type is "GET" if it's an optimized GET on active side, + * see details in lnet_create_reply_msg() + */ LASSERT(msg->msg_type == LNET_MSG_GET || msg->msg_type == LNET_MSG_REPLY); break; @@ -309,10 +314,12 @@ lnet_msg_attach_md(lnet_msg_t *msg, lnet_libmd_t *md, unsigned int offset, unsigned int mlen) { /* NB: @offset and @len are only useful for receiving */ - /* Here, we attach the MD on lnet_msg and mark it busy and + /* + * Here, we attach the MD on lnet_msg and mark it busy and * decrementing its threshold. Come what may, the lnet_msg "owns" * the MD until a call to lnet_msg_detach_md or lnet_finalize() - * signals completion. */ + * signals completion. + */ LASSERT(!msg->msg_routing); msg->msg_md = md; @@ -343,7 +350,7 @@ lnet_msg_detach_md(lnet_msg_t *msg, int status) LASSERT(md->md_refcount >= 0); unlink = lnet_md_unlinkable(md); - if (md->md_eq != NULL) { + if (md->md_eq) { msg->msg_ev.status = status; msg->msg_ev.unlinked = unlink; lnet_eq_enqueue_event(md->md_eq, &msg->msg_ev); @@ -364,7 +371,7 @@ lnet_complete_msg_locked(lnet_msg_t *msg, int cpt) LASSERT(msg->msg_onactivelist); - if (status == 0 && msg->msg_ack) { + if (!status && msg->msg_ack) { /* Only send an ACK if the PUT completed successfully */ lnet_msg_decommit(msg, cpt, 0); @@ -383,8 +390,10 @@ lnet_complete_msg_locked(lnet_msg_t *msg, int cpt) msg->msg_hdr.msg.ack.match_bits = msg->msg_ev.match_bits; msg->msg_hdr.msg.ack.mlength = cpu_to_le32(msg->msg_ev.mlength); - /* NB: we probably want to use NID of msg::msg_from as 3rd - * parameter (router NID) if it's routed message */ + /* + * NB: we probably want to use NID of msg::msg_from as 3rd + * parameter (router NID) if it's routed message + */ rc = lnet_send(msg->msg_ev.target.nid, msg, LNET_NID_ANY); lnet_net_lock(cpt); @@ -401,7 +410,7 @@ lnet_complete_msg_locked(lnet_msg_t *msg, int cpt) */ return rc; - } else if (status == 0 && /* OK so far */ + } else if (!status && /* OK so far */ (msg->msg_routing && !msg->msg_sending)) { /* not forwarded */ LASSERT(!msg->msg_receiving); /* called back recv already */ @@ -442,7 +451,7 @@ lnet_finalize(lnet_ni_t *ni, lnet_msg_t *msg, int status) LASSERT(!in_interrupt()); - if (msg == NULL) + if (!msg) return; #if 0 CDEBUG(D_WARNING, "%s msg->%s Flags:%s%s%s%s%s%s%s%s%s%s%s txp %s rxp %s\n", @@ -458,12 +467,12 @@ lnet_finalize(lnet_ni_t *ni, lnet_msg_t *msg, int status) msg->msg_rtrcredit ? "F" : "", msg->msg_peerrtrcredit ? "f" : "", msg->msg_onactivelist ? "!" : "", - msg->msg_txpeer == NULL ? "<none>" : libcfs_nid2str(msg->msg_txpeer->lp_nid), - msg->msg_rxpeer == NULL ? "<none>" : libcfs_nid2str(msg->msg_rxpeer->lp_nid)); + !msg->msg_txpeer ? "<none>" : libcfs_nid2str(msg->msg_txpeer->lp_nid), + !msg->msg_rxpeer ? "<none>" : libcfs_nid2str(msg->msg_rxpeer->lp_nid)); #endif msg->msg_ev.status = status; - if (msg->msg_md != NULL) { + if (msg->msg_md) { cpt = lnet_cpt_of_cookie(msg->msg_md->md_lh.lh_cookie); lnet_res_lock(cpt); @@ -491,15 +500,16 @@ lnet_finalize(lnet_ni_t *ni, lnet_msg_t *msg, int status) container = the_lnet.ln_msg_containers[cpt]; list_add_tail(&msg->msg_list, &container->msc_finalizing); - /* Recursion breaker. Don't complete the message here if I am (or - * enough other threads are) already completing messages */ - + /* + * Recursion breaker. Don't complete the message here if I am (or + * enough other threads are) already completing messages + */ my_slot = -1; for (i = 0; i < container->msc_nfinalizers; i++) { if (container->msc_finalizers[i] == current) break; - if (my_slot < 0 && container->msc_finalizers[i] == NULL) + if (my_slot < 0 && !container->msc_finalizers[i]) my_slot = i; } @@ -512,21 +522,29 @@ lnet_finalize(lnet_ni_t *ni, lnet_msg_t *msg, int status) while (!list_empty(&container->msc_finalizing)) { msg = list_entry(container->msc_finalizing.next, - lnet_msg_t, msg_list); + lnet_msg_t, msg_list); list_del(&msg->msg_list); - /* NB drops and regains the lnet lock if it actually does - * anything, so my finalizing friends can chomp along too */ + /* + * NB drops and regains the lnet lock if it actually does + * anything, so my finalizing friends can chomp along too + */ rc = lnet_complete_msg_locked(msg, cpt); - if (rc != 0) + if (rc) break; } + if (unlikely(!list_empty(&the_lnet.ln_delay_rules))) { + lnet_net_unlock(cpt); + lnet_delay_rule_check(); + lnet_net_lock(cpt); + } + container->msc_finalizers[my_slot] = NULL; lnet_net_unlock(cpt); - if (rc != 0) + if (rc) goto again; } EXPORT_SYMBOL(lnet_finalize); @@ -536,12 +554,12 @@ lnet_msg_container_cleanup(struct lnet_msg_container *container) { int count = 0; - if (container->msc_init == 0) + if (!container->msc_init) return; while (!list_empty(&container->msc_active)) { lnet_msg_t *msg = list_entry(container->msc_active.next, - lnet_msg_t, msg_activelist); + lnet_msg_t, msg_activelist); LASSERT(msg->msg_onactivelist); msg->msg_onactivelist = 0; @@ -553,41 +571,23 @@ lnet_msg_container_cleanup(struct lnet_msg_container *container) if (count > 0) CERROR("%d active msg on exit\n", count); - if (container->msc_finalizers != NULL) { + if (container->msc_finalizers) { LIBCFS_FREE(container->msc_finalizers, container->msc_nfinalizers * sizeof(*container->msc_finalizers)); container->msc_finalizers = NULL; } -#ifdef LNET_USE_LIB_FREELIST - lnet_freelist_fini(&container->msc_freelist); -#endif container->msc_init = 0; } int lnet_msg_container_setup(struct lnet_msg_container *container, int cpt) { - int rc; - container->msc_init = 1; INIT_LIST_HEAD(&container->msc_active); INIT_LIST_HEAD(&container->msc_finalizing); -#ifdef LNET_USE_LIB_FREELIST - memset(&container->msc_freelist, 0, sizeof(lnet_freelist_t)); - - rc = lnet_freelist_init(&container->msc_freelist, - LNET_FL_MAX_MSGS, sizeof(lnet_msg_t)); - if (rc != 0) { - CERROR("Failed to init freelist for message container\n"); - lnet_msg_container_cleanup(container); - return rc; - } -#else - rc = 0; -#endif /* number of CPUs */ container->msc_nfinalizers = cfs_cpt_weight(lnet_cpt_table(), cpt); @@ -595,13 +595,13 @@ lnet_msg_container_setup(struct lnet_msg_container *container, int cpt) container->msc_nfinalizers * sizeof(*container->msc_finalizers)); - if (container->msc_finalizers == NULL) { + if (!container->msc_finalizers) { CERROR("Failed to allocate message finalizers\n"); lnet_msg_container_cleanup(container); return -ENOMEM; } - return rc; + return 0; } void @@ -610,7 +610,7 @@ lnet_msg_containers_destroy(void) struct lnet_msg_container *container; int i; - if (the_lnet.ln_msg_containers == NULL) + if (!the_lnet.ln_msg_containers) return; cfs_percpt_for_each(container, i, the_lnet.ln_msg_containers) @@ -630,14 +630,14 @@ lnet_msg_containers_create(void) the_lnet.ln_msg_containers = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*container)); - if (the_lnet.ln_msg_containers == NULL) { + if (!the_lnet.ln_msg_containers) { CERROR("Failed to allocate cpu-partition data for network\n"); return -ENOMEM; } cfs_percpt_for_each(container, i, the_lnet.ln_msg_containers) { rc = lnet_msg_container_setup(container, i); - if (rc != 0) { + if (rc) { lnet_msg_containers_destroy(); return rc; } diff --git a/drivers/staging/lustre/lnet/lnet/lib-ptl.c b/drivers/staging/lustre/lnet/lnet/lib-ptl.c index bd7b071b2..3947e8b71 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-ptl.c +++ b/drivers/staging/lustre/lnet/lnet/lib-ptl.c @@ -13,11 +13,6 @@ * General Public License version 2 for more details (a copy is included * in the LICENSE file that accompanied this code). * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA - * * GPL HEADER END */ /* @@ -50,7 +45,7 @@ lnet_ptl_match_type(unsigned int index, lnet_process_id_t match_id, struct lnet_portal *ptl = the_lnet.ln_portals[index]; int unique; - unique = ignore_bits == 0 && + unique = !ignore_bits && match_id.nid != LNET_NID_ANY && match_id.pid != LNET_PID_ANY; @@ -139,8 +134,10 @@ static int lnet_try_match_md(lnet_libmd_t *md, struct lnet_match_info *info, struct lnet_msg *msg) { - /* ALWAYS called holding the lnet_res_lock, and can't lnet_res_unlock; - * lnet_match_blocked_msg() relies on this to avoid races */ + /* + * ALWAYS called holding the lnet_res_lock, and can't lnet_res_unlock; + * lnet_match_blocked_msg() relies on this to avoid races + */ unsigned int offset; unsigned int mlength; lnet_me_t *me = md->md_me; @@ -150,7 +147,7 @@ lnet_try_match_md(lnet_libmd_t *md, return LNET_MATCHMD_NONE | LNET_MATCHMD_EXHAUSTED; /* mismatched MD op */ - if ((md->md_options & info->mi_opc) == 0) + if (!(md->md_options & info->mi_opc)) return LNET_MATCHMD_NONE; /* mismatched ME nid/pid? */ @@ -163,17 +160,17 @@ lnet_try_match_md(lnet_libmd_t *md, return LNET_MATCHMD_NONE; /* mismatched ME matchbits? */ - if (((me->me_match_bits ^ info->mi_mbits) & ~me->me_ignore_bits) != 0) + if ((me->me_match_bits ^ info->mi_mbits) & ~me->me_ignore_bits) return LNET_MATCHMD_NONE; /* Hurrah! This _is_ a match; check it out... */ - if ((md->md_options & LNET_MD_MANAGE_REMOTE) == 0) + if (!(md->md_options & LNET_MD_MANAGE_REMOTE)) offset = md->md_offset; else offset = info->mi_roffset; - if ((md->md_options & LNET_MD_MAX_SIZE) != 0) { + if (md->md_options & LNET_MD_MAX_SIZE) { mlength = md->md_max_size; LASSERT(md->md_offset + mlength <= md->md_length); } else { @@ -182,7 +179,7 @@ lnet_try_match_md(lnet_libmd_t *md, if (info->mi_rlength <= mlength) { /* fits in allowed space */ mlength = info->mi_rlength; - } else if ((md->md_options & LNET_MD_TRUNCATE) == 0) { + } else if (!(md->md_options & LNET_MD_TRUNCATE)) { /* this packet _really_ is too big */ CERROR("Matching packet from %s, match %llu length %d too big: %d left, %d allowed\n", libcfs_id2str(info->mi_id), info->mi_mbits, @@ -203,10 +200,12 @@ lnet_try_match_md(lnet_libmd_t *md, if (!lnet_md_exhausted(md)) return LNET_MATCHMD_OK; - /* Auto-unlink NOW, so the ME gets unlinked if required. + /* + * Auto-unlink NOW, so the ME gets unlinked if required. * We bumped md->md_refcount above so the MD just gets flagged - * for unlink when it is finalized. */ - if ((md->md_flags & LNET_MD_FLAG_AUTO_UNLINK) != 0) + * for unlink when it is finalized. + */ + if (md->md_flags & LNET_MD_FLAG_AUTO_UNLINK) lnet_md_unlink(md); return LNET_MATCHMD_OK | LNET_MATCHMD_EXHAUSTED; @@ -239,7 +238,7 @@ lnet_mt_of_attach(unsigned int index, lnet_process_id_t id, ptl = the_lnet.ln_portals[index]; mtable = lnet_match2mt(ptl, id, mbits); - if (mtable != NULL) /* unique portal or only one match-table */ + if (mtable) /* unique portal or only one match-table */ return mtable; /* it's a wildcard portal */ @@ -248,8 +247,10 @@ lnet_mt_of_attach(unsigned int index, lnet_process_id_t id, return NULL; case LNET_INS_BEFORE: case LNET_INS_AFTER: - /* posted by no affinity thread, always hash to specific - * match-table to avoid buffer stealing which is heavy */ + /* + * posted by no affinity thread, always hash to specific + * match-table to avoid buffer stealing which is heavy + */ return ptl->ptl_mtables[ptl->ptl_index % LNET_CPT_NUMBER]; case LNET_INS_LOCAL: /* posted by cpu-affinity thread */ @@ -274,7 +275,7 @@ lnet_mt_of_match(struct lnet_match_info *info, struct lnet_msg *msg) LASSERT(lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl)); mtable = lnet_match2mt(ptl, info->mi_id, info->mi_mbits); - if (mtable != NULL) + if (mtable) return mtable; /* it's a wildcard portal */ @@ -298,10 +299,12 @@ lnet_mt_of_match(struct lnet_match_info *info, struct lnet_msg *msg) /* is there any active entry for this portal? */ nmaps = ptl->ptl_mt_nmaps; /* map to an active mtable to avoid heavy "stealing" */ - if (nmaps != 0) { - /* NB: there is possibility that ptl_mt_maps is being + if (nmaps) { + /* + * NB: there is possibility that ptl_mt_maps is being * changed because we are not under protection of - * lnet_ptl_lock, but it shouldn't hurt anything */ + * lnet_ptl_lock, but it shouldn't hurt anything + */ cpt = ptl->ptl_mt_maps[rotor % nmaps]; } } @@ -331,7 +334,7 @@ lnet_mt_test_exhausted(struct lnet_match_table *mtable, int pos) bmap = &mtable->mt_exhausted[pos >> LNET_MT_BITS_U64]; pos &= (1 << LNET_MT_BITS_U64) - 1; - return ((*bmap) & (1ULL << pos)) != 0; + return (*bmap & (1ULL << pos)); } static void @@ -357,16 +360,15 @@ lnet_mt_match_head(struct lnet_match_table *mtable, lnet_process_id_t id, __u64 mbits) { struct lnet_portal *ptl = the_lnet.ln_portals[mtable->mt_portal]; + unsigned long hash = mbits; - if (lnet_ptl_is_wildcard(ptl)) { - return &mtable->mt_mhash[mbits & LNET_MT_HASH_MASK]; - } else { - unsigned long hash = mbits + id.nid + id.pid; + if (!lnet_ptl_is_wildcard(ptl)) { + hash += id.nid + id.pid; LASSERT(lnet_ptl_is_unique(ptl)); hash = hash_long(hash, LNET_MT_HASH_BITS); - return &mtable->mt_mhash[hash]; } + return &mtable->mt_mhash[hash & LNET_MT_HASH_MASK]; } int @@ -391,18 +393,20 @@ lnet_mt_match_md(struct lnet_match_table *mtable, list_for_each_entry_safe(me, tmp, head, me_list) { /* ME attached but MD not attached yet */ - if (me->me_md == NULL) + if (!me->me_md) continue; LASSERT(me == me->me_md->md_me); rc = lnet_try_match_md(me->me_md, info, msg); - if ((rc & LNET_MATCHMD_EXHAUSTED) == 0) + if (!(rc & LNET_MATCHMD_EXHAUSTED)) exhausted = 0; /* mlist is not empty */ - if ((rc & LNET_MATCHMD_FINISH) != 0) { - /* don't return EXHAUSTED bit because we don't know - * whether the mlist is empty or not */ + if (rc & LNET_MATCHMD_FINISH) { + /* + * don't return EXHAUSTED bit because we don't know + * whether the mlist is empty or not + */ return rc & ~LNET_MATCHMD_EXHAUSTED; } } @@ -413,7 +417,7 @@ lnet_mt_match_md(struct lnet_match_table *mtable, exhausted = 0; } - if (exhausted == 0 && head == &mtable->mt_mhash[LNET_MT_HASH_IGNORE]) { + if (!exhausted && head == &mtable->mt_mhash[LNET_MT_HASH_IGNORE]) { head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits); goto again; /* re-check MEs w/o ignore-bits */ } @@ -430,8 +434,10 @@ lnet_ptl_match_early(struct lnet_portal *ptl, struct lnet_msg *msg) { int rc; - /* message arrived before any buffer posting on this portal, - * simply delay or drop this message */ + /* + * message arrived before any buffer posting on this portal, + * simply delay or drop this message + */ if (likely(lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl))) return 0; @@ -446,7 +452,7 @@ lnet_ptl_match_early(struct lnet_portal *ptl, struct lnet_msg *msg) if (msg->msg_rx_ready_delay) { msg->msg_rx_delayed = 1; list_add_tail(&msg->msg_list, - &ptl->ptl_msg_delayed); + &ptl->ptl_msg_delayed); } rc = LNET_MATCHMD_NONE; } else { @@ -465,9 +471,13 @@ lnet_ptl_match_delay(struct lnet_portal *ptl, int rc = 0; int i; - /* steal buffer from other CPTs, and delay it if nothing to steal, - * this function is more expensive than a regular match, but we - * don't expect it can happen a lot */ + /** + * Steal buffer from other CPTs, and delay msg if nothing to + * steal. This function is more expensive than a regular + * match, but we don't expect it can happen a lot. The return + * code contains one of LNET_MATCHMD_OK, LNET_MATCHMD_DROP, or + * LNET_MATCHMD_NONE. + */ LASSERT(lnet_ptl_is_wildcard(ptl)); for (i = 0; i < LNET_CPT_NUMBER; i++) { @@ -476,56 +486,77 @@ lnet_ptl_match_delay(struct lnet_portal *ptl, cpt = (first + i) % LNET_CPT_NUMBER; mtable = ptl->ptl_mtables[cpt]; - if (i != 0 && i != LNET_CPT_NUMBER - 1 && !mtable->mt_enabled) + if (i && i != LNET_CPT_NUMBER - 1 && !mtable->mt_enabled) continue; lnet_res_lock(cpt); lnet_ptl_lock(ptl); - if (i == 0) { /* the first try, attach on stealing list */ + if (!i) { + /* The first try, add to stealing list. */ list_add_tail(&msg->msg_list, - &ptl->ptl_msg_stealing); + &ptl->ptl_msg_stealing); } - if (!list_empty(&msg->msg_list)) { /* on stealing list */ + if (!list_empty(&msg->msg_list)) { + /* On stealing list. */ rc = lnet_mt_match_md(mtable, info, msg); - if ((rc & LNET_MATCHMD_EXHAUSTED) != 0 && + if ((rc & LNET_MATCHMD_EXHAUSTED) && mtable->mt_enabled) lnet_ptl_disable_mt(ptl, cpt); - if ((rc & LNET_MATCHMD_FINISH) != 0) + if (rc & LNET_MATCHMD_FINISH) { + /* Match found, remove from stealing list. */ + list_del_init(&msg->msg_list); + } else if (i == LNET_CPT_NUMBER - 1 || /* (1) */ + !ptl->ptl_mt_nmaps || /* (2) */ + (ptl->ptl_mt_nmaps == 1 && /* (3) */ + ptl->ptl_mt_maps[0] == cpt)) { + /** + * No match found, and this is either + * (1) the last cpt to check, or + * (2) there is no active cpt, or + * (3) this is the only active cpt. + * There is nothing to steal: delay or + * drop the message. + */ list_del_init(&msg->msg_list); - } else { - /* could be matched by lnet_ptl_attach_md() - * which is called by another thread */ - rc = msg->msg_md == NULL ? - LNET_MATCHMD_DROP : LNET_MATCHMD_OK; - } - - if (!list_empty(&msg->msg_list) && /* not matched yet */ - (i == LNET_CPT_NUMBER - 1 || /* the last CPT */ - ptl->ptl_mt_nmaps == 0 || /* no active CPT */ - (ptl->ptl_mt_nmaps == 1 && /* the only active CPT */ - ptl->ptl_mt_maps[0] == cpt))) { - /* nothing to steal, delay or drop */ - list_del_init(&msg->msg_list); - - if (lnet_ptl_is_lazy(ptl)) { - msg->msg_rx_delayed = 1; - list_add_tail(&msg->msg_list, - &ptl->ptl_msg_delayed); - rc = LNET_MATCHMD_NONE; + if (lnet_ptl_is_lazy(ptl)) { + msg->msg_rx_delayed = 1; + list_add_tail(&msg->msg_list, + &ptl->ptl_msg_delayed); + rc = LNET_MATCHMD_NONE; + } else { + rc = LNET_MATCHMD_DROP; + } } else { - rc = LNET_MATCHMD_DROP; + /* Do another iteration. */ + rc = 0; } + } else { + /** + * No longer on stealing list: another thread + * matched the message in lnet_ptl_attach_md(). + * We are now expected to handle the message. + */ + rc = !msg->msg_md ? + LNET_MATCHMD_DROP : LNET_MATCHMD_OK; } lnet_ptl_unlock(ptl); lnet_res_unlock(cpt); - if ((rc & LNET_MATCHMD_FINISH) != 0 || msg->msg_rx_delayed) + /** + * Note that test (1) above ensures that we always + * exit the loop through this break statement. + * + * LNET_MATCHMD_NONE means msg was added to the + * delayed queue, and we may no longer reference it + * after lnet_ptl_unlock() and lnet_res_unlock(). + */ + if (rc & (LNET_MATCHMD_FINISH | LNET_MATCHMD_NONE)) break; } @@ -551,7 +582,7 @@ lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg) ptl = the_lnet.ln_portals[info->mi_portal]; rc = lnet_ptl_match_early(ptl, msg); - if (rc != 0) /* matched or delayed early message */ + if (rc) /* matched or delayed early message */ return rc; mtable = lnet_mt_of_match(info, msg); @@ -563,13 +594,13 @@ lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg) } rc = lnet_mt_match_md(mtable, info, msg); - if ((rc & LNET_MATCHMD_EXHAUSTED) != 0 && mtable->mt_enabled) { + if ((rc & LNET_MATCHMD_EXHAUSTED) && mtable->mt_enabled) { lnet_ptl_lock(ptl); lnet_ptl_disable_mt(ptl, mtable->mt_cpt); lnet_ptl_unlock(ptl); } - if ((rc & LNET_MATCHMD_FINISH) != 0) /* matched or dropping */ + if (rc & LNET_MATCHMD_FINISH) /* matched or dropping */ goto out1; if (!msg->msg_rx_ready_delay) @@ -587,13 +618,14 @@ lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg) lnet_ptl_unlock(ptl); lnet_res_unlock(mtable->mt_cpt); - + rc = LNET_MATCHMD_NONE; } else { lnet_res_unlock(mtable->mt_cpt); rc = lnet_ptl_match_delay(ptl, info, msg); } - if (msg->msg_rx_delayed) { + /* LNET_MATCHMD_NONE means msg was added to the delay queue */ + if (rc & LNET_MATCHMD_NONE) { CDEBUG(D_NET, "Delaying %s from %s ptl %d MB %#llx off %d len %d\n", info->mi_opc == LNET_MD_OP_PUT ? "PUT" : "GET", @@ -630,7 +662,7 @@ lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md, int exhausted = 0; int cpt; - LASSERT(md->md_refcount == 0); /* a brand new MD */ + LASSERT(!md->md_refcount); /* a brand new MD */ me->me_md = md; md->md_me = me; @@ -664,15 +696,15 @@ lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md, rc = lnet_try_match_md(md, &info, msg); - exhausted = (rc & LNET_MATCHMD_EXHAUSTED) != 0; - if ((rc & LNET_MATCHMD_NONE) != 0) { + exhausted = (rc & LNET_MATCHMD_EXHAUSTED); + if (rc & LNET_MATCHMD_NONE) { if (exhausted) break; continue; } /* Hurrah! This _is_ a match */ - LASSERT((rc & LNET_MATCHMD_FINISH) != 0); + LASSERT(rc & LNET_MATCHMD_FINISH); list_del_init(&msg->msg_list); if (head == &ptl->ptl_msg_stealing) { @@ -682,7 +714,7 @@ lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md, continue; } - if ((rc & LNET_MATCHMD_OK) != 0) { + if (rc & LNET_MATCHMD_OK) { list_add_tail(&msg->msg_list, matches); CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d match %llu offset %d length %d.\n", @@ -717,7 +749,7 @@ lnet_ptl_cleanup(struct lnet_portal *ptl) struct lnet_match_table *mtable; int i; - if (ptl->ptl_mtables == NULL) /* uninitialized portal */ + if (!ptl->ptl_mtables) /* uninitialized portal */ return; LASSERT(list_empty(&ptl->ptl_msg_delayed)); @@ -727,7 +759,7 @@ lnet_ptl_cleanup(struct lnet_portal *ptl) lnet_me_t *me; int j; - if (mtable->mt_mhash == NULL) /* uninitialized match-table */ + if (!mtable->mt_mhash) /* uninitialized match-table */ continue; mhash = mtable->mt_mhash; @@ -735,7 +767,7 @@ lnet_ptl_cleanup(struct lnet_portal *ptl) for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++) { while (!list_empty(&mhash[j])) { me = list_entry(mhash[j].next, - lnet_me_t, me_list); + lnet_me_t, me_list); CERROR("Active ME %p on exit\n", me); list_del(&me->me_list); lnet_me_free(me); @@ -759,7 +791,7 @@ lnet_ptl_setup(struct lnet_portal *ptl, int index) ptl->ptl_mtables = cfs_percpt_alloc(lnet_cpt_table(), sizeof(struct lnet_match_table)); - if (ptl->ptl_mtables == NULL) { + if (!ptl->ptl_mtables) { CERROR("Failed to create match table for portal %d\n", index); return -ENOMEM; } @@ -772,7 +804,7 @@ lnet_ptl_setup(struct lnet_portal *ptl, int index) /* the extra entry is for MEs with ignore bits */ LIBCFS_CPT_ALLOC(mhash, lnet_cpt_table(), i, sizeof(*mhash) * (LNET_MT_HASH_SIZE + 1)); - if (mhash == NULL) { + if (!mhash) { CERROR("Failed to create match hash for portal %d\n", index); goto failed; @@ -800,7 +832,7 @@ lnet_portals_destroy(void) { int i; - if (the_lnet.ln_portals == NULL) + if (!the_lnet.ln_portals) return; for (i = 0; i < the_lnet.ln_nportals; i++) @@ -820,7 +852,7 @@ lnet_portals_create(void) the_lnet.ln_nportals = MAX_PORTALS; the_lnet.ln_portals = cfs_array_alloc(the_lnet.ln_nportals, size); - if (the_lnet.ln_portals == NULL) { + if (!the_lnet.ln_portals) { CERROR("Failed to allocate portals table\n"); return -ENOMEM; } @@ -886,17 +918,8 @@ LNetSetLazyPortal(int portal) } EXPORT_SYMBOL(LNetSetLazyPortal); -/** - * Turn off the lazy portal attribute. Delayed requests on the portal, - * if any, will be all dropped when this function returns. - * - * \param portal Index of the portal to disable the lazy attribute on. - * - * \retval 0 On success. - * \retval -EINVAL If \a portal is not a valid index. - */ int -LNetClearLazyPortal(int portal) +lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason) { struct lnet_portal *ptl; LIST_HEAD(zombies); @@ -915,21 +938,48 @@ LNetClearLazyPortal(int portal) return 0; } - if (the_lnet.ln_shutdown) - CWARN("Active lazy portal %d on exit\n", portal); - else - CDEBUG(D_NET, "clearing portal %d lazy\n", portal); + if (ni) { + struct lnet_msg *msg, *tmp; + + /* grab all messages which are on the NI passed in */ + list_for_each_entry_safe(msg, tmp, &ptl->ptl_msg_delayed, + msg_list) { + if (msg->msg_rxpeer->lp_ni == ni) + list_move(&msg->msg_list, &zombies); + } + } else { + if (the_lnet.ln_shutdown) + CWARN("Active lazy portal %d on exit\n", portal); + else + CDEBUG(D_NET, "clearing portal %d lazy\n", portal); - /* grab all the blocked messages atomically */ - list_splice_init(&ptl->ptl_msg_delayed, &zombies); + /* grab all the blocked messages atomically */ + list_splice_init(&ptl->ptl_msg_delayed, &zombies); - lnet_ptl_unsetopt(ptl, LNET_PTL_LAZY); + lnet_ptl_unsetopt(ptl, LNET_PTL_LAZY); + } lnet_ptl_unlock(ptl); lnet_res_unlock(LNET_LOCK_EX); - lnet_drop_delayed_msg_list(&zombies, "Clearing lazy portal attr"); + lnet_drop_delayed_msg_list(&zombies, reason); return 0; } + +/** + * Turn off the lazy portal attribute. Delayed requests on the portal, + * if any, will be all dropped when this function returns. + * + * \param portal Index of the portal to disable the lazy attribute on. + * + * \retval 0 On success. + * \retval -EINVAL If \a portal is not a valid index. + */ +int +LNetClearLazyPortal(int portal) +{ + return lnet_clear_lazy_portal(NULL, portal, + "Clearing lazy portal attr"); +} EXPORT_SYMBOL(LNetClearLazyPortal); diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c index 589ecc84d..891fd5940 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-socket.c +++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c @@ -64,7 +64,7 @@ lnet_sock_ioctl(int cmd, unsigned long arg) int rc; rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock); - if (rc != 0) { + if (rc) { CERROR("Can't create socket: %d\n", rc); return rc; } @@ -99,14 +99,17 @@ lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask) CLASSERT(sizeof(ifr.ifr_name) >= IFNAMSIZ); - strcpy(ifr.ifr_name, name); + if (strlen(name) > sizeof(ifr.ifr_name) - 1) + return -E2BIG; + strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); + rc = lnet_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr); - if (rc != 0) { + if (rc) { CERROR("Can't get flags for interface %s\n", name); return rc; } - if ((ifr.ifr_flags & IFF_UP) == 0) { + if (!(ifr.ifr_flags & IFF_UP)) { CDEBUG(D_NET, "Interface %s down\n", name); *up = 0; *ip = *mask = 0; @@ -114,10 +117,13 @@ lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask) } *up = 1; - strcpy(ifr.ifr_name, name); + if (strlen(name) > sizeof(ifr.ifr_name) - 1) + return -E2BIG; + strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); + ifr.ifr_addr.sa_family = AF_INET; rc = lnet_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr); - if (rc != 0) { + if (rc) { CERROR("Can't get IP address for interface %s\n", name); return rc; } @@ -125,10 +131,13 @@ lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask) val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr; *ip = ntohl(val); - strcpy(ifr.ifr_name, name); + if (strlen(name) > sizeof(ifr.ifr_name) - 1) + return -E2BIG; + strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); + ifr.ifr_addr.sa_family = AF_INET; rc = lnet_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr); - if (rc != 0) { + if (rc) { CERROR("Can't get netmask for interface %s\n", name); return rc; } @@ -157,15 +166,15 @@ lnet_ipif_enumerate(char ***namesp) nalloc = 16; /* first guess at max interfaces */ toobig = 0; for (;;) { - if (nalloc * sizeof(*ifr) > PAGE_CACHE_SIZE) { + if (nalloc * sizeof(*ifr) > PAGE_SIZE) { toobig = 1; - nalloc = PAGE_CACHE_SIZE/sizeof(*ifr); + nalloc = PAGE_SIZE / sizeof(*ifr); CWARN("Too many interfaces: only enumerating first %d\n", nalloc); } LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr)); - if (ifr == NULL) { + if (!ifr) { CERROR("ENOMEM enumerating up to %d interfaces\n", nalloc); rc = -ENOMEM; @@ -181,9 +190,9 @@ lnet_ipif_enumerate(char ***namesp) goto out1; } - LASSERT(rc == 0); + LASSERT(!rc); - nfound = ifc.ifc_len/sizeof(*ifr); + nfound = ifc.ifc_len / sizeof(*ifr); LASSERT(nfound <= nalloc); if (nfound < nalloc || toobig) @@ -193,11 +202,11 @@ lnet_ipif_enumerate(char ***namesp) nalloc *= 2; } - if (nfound == 0) + if (!nfound) goto out1; LIBCFS_ALLOC(names, nfound * sizeof(*names)); - if (names == NULL) { + if (!names) { rc = -ENOMEM; goto out1; } @@ -213,7 +222,7 @@ lnet_ipif_enumerate(char ***namesp) } LIBCFS_ALLOC(names[i], IFNAMSIZ); - if (names[i] == NULL) { + if (!names[i]) { rc = -ENOMEM; goto out2; } @@ -242,7 +251,7 @@ lnet_ipif_free_enumeration(char **names, int n) LASSERT(n > 0); - for (i = 0; i < n && names[i] != NULL; i++) + for (i = 0; i < n && names[i]; i++) LIBCFS_FREE(names[i], IFNAMSIZ); LIBCFS_FREE(names, n * sizeof(*names)); @@ -253,32 +262,30 @@ int lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout) { int rc; - long ticks = timeout * HZ; + long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC); unsigned long then; struct timeval tv; LASSERT(nob > 0); - /* Caller may pass a zero timeout if she thinks the socket buffer is - * empty enough to take the whole message immediately */ - + /* + * Caller may pass a zero timeout if she thinks the socket buffer is + * empty enough to take the whole message immediately + */ for (;;) { struct kvec iov = { .iov_base = buffer, .iov_len = nob }; struct msghdr msg = { - .msg_flags = (timeout == 0) ? MSG_DONTWAIT : 0 + .msg_flags = !timeout ? MSG_DONTWAIT : 0 }; - if (timeout != 0) { + if (timeout) { /* Set send timeout to remaining time */ - tv = (struct timeval) { - .tv_sec = ticks / HZ, - .tv_usec = ((ticks % HZ) * 1000000) / HZ - }; + jiffies_to_timeval(jiffies_left, &tv); rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&tv, sizeof(tv)); - if (rc != 0) { + if (rc) { CERROR("Can't set socket send timeout %ld.%06d: %d\n", (long)tv.tv_sec, (int)tv.tv_usec, rc); return rc; @@ -287,7 +294,7 @@ lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout) then = jiffies; rc = kernel_sendmsg(sock, &msg, &iov, 1, nob); - ticks -= jiffies - then; + jiffies_left -= jiffies - then; if (rc == nob) return 0; @@ -295,12 +302,12 @@ lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout) if (rc < 0) return rc; - if (rc == 0) { + if (!rc) { CERROR("Unexpected zero rc\n"); return -ECONNABORTED; } - if (ticks <= 0) + if (jiffies_left <= 0) return -EAGAIN; buffer = ((char *)buffer) + rc; @@ -314,12 +321,12 @@ int lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout) { int rc; - long ticks = timeout * HZ; + long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC); unsigned long then; struct timeval tv; LASSERT(nob > 0); - LASSERT(ticks > 0); + LASSERT(jiffies_left > 0); for (;;) { struct kvec iov = { @@ -331,13 +338,10 @@ lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout) }; /* Set receive timeout to remaining time */ - tv = (struct timeval) { - .tv_sec = ticks / HZ, - .tv_usec = ((ticks % HZ) * 1000000) / HZ - }; + jiffies_to_timeval(jiffies_left, &tv); rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (char *)&tv, sizeof(tv)); - if (rc != 0) { + if (rc) { CERROR("Can't set socket recv timeout %ld.%06d: %d\n", (long)tv.tv_sec, (int)tv.tv_usec, rc); return rc; @@ -345,21 +349,21 @@ lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout) then = jiffies; rc = kernel_recvmsg(sock, &msg, &iov, 1, nob, 0); - ticks -= jiffies - then; + jiffies_left -= jiffies - then; if (rc < 0) return rc; - if (rc == 0) + if (!rc) return -ECONNRESET; buffer = ((char *)buffer) + rc; nob -= rc; - if (nob == 0) + if (!nob) return 0; - if (ticks <= 0) + if (jiffies_left <= 0) return -ETIMEDOUT; } } @@ -379,7 +383,7 @@ lnet_sock_create(struct socket **sockp, int *fatal, __u32 local_ip, rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock); *sockp = sock; - if (rc != 0) { + if (rc) { CERROR("Can't create socket: %d\n", rc); return rc; } @@ -387,16 +391,16 @@ lnet_sock_create(struct socket **sockp, int *fatal, __u32 local_ip, option = 1; rc = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&option, sizeof(option)); - if (rc != 0) { + if (rc) { CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc); goto failed; } - if (local_ip != 0 || local_port != 0) { + if (local_ip || local_port) { memset(&locaddr, 0, sizeof(locaddr)); locaddr.sin_family = AF_INET; locaddr.sin_port = htons(local_port); - locaddr.sin_addr.s_addr = (local_ip == 0) ? + locaddr.sin_addr.s_addr = !local_ip ? INADDR_ANY : htonl(local_ip); rc = kernel_bind(sock, (struct sockaddr *)&locaddr, @@ -406,7 +410,7 @@ lnet_sock_create(struct socket **sockp, int *fatal, __u32 local_ip, *fatal = 0; goto failed; } - if (rc != 0) { + if (rc) { CERROR("Error trying to bind to port %d: %d\n", local_port, rc); goto failed; @@ -425,22 +429,22 @@ lnet_sock_setbuf(struct socket *sock, int txbufsize, int rxbufsize) int option; int rc; - if (txbufsize != 0) { + if (txbufsize) { option = txbufsize; rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDBUF, (char *)&option, sizeof(option)); - if (rc != 0) { + if (rc) { CERROR("Can't set send buffer %d: %d\n", option, rc); return rc; } } - if (rxbufsize != 0) { + if (rxbufsize) { option = rxbufsize; rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUF, - (char *)&option, sizeof(option)); - if (rc != 0) { + (char *)&option, sizeof(option)); + if (rc) { CERROR("Can't set receive buffer %d: %d\n", option, rc); return rc; @@ -461,16 +465,16 @@ lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port) rc = kernel_getpeername(sock, (struct sockaddr *)&sin, &len); else rc = kernel_getsockname(sock, (struct sockaddr *)&sin, &len); - if (rc != 0) { + if (rc) { CERROR("Error %d getting sock %s IP/port\n", rc, remote ? "peer" : "local"); return rc; } - if (ip != NULL) + if (ip) *ip = ntohl(sin.sin_addr.s_addr); - if (port != NULL) + if (port) *port = ntohs(sin.sin_port); return 0; @@ -480,10 +484,10 @@ EXPORT_SYMBOL(lnet_sock_getaddr); int lnet_sock_getbuf(struct socket *sock, int *txbufsize, int *rxbufsize) { - if (txbufsize != NULL) + if (txbufsize) *txbufsize = sock->sk->sk_sndbuf; - if (rxbufsize != NULL) + if (rxbufsize) *rxbufsize = sock->sk->sk_rcvbuf; return 0; @@ -498,7 +502,7 @@ lnet_sock_listen(struct socket **sockp, __u32 local_ip, int local_port, int rc; rc = lnet_sock_create(sockp, &fatal, local_ip, local_port); - if (rc != 0) { + if (rc) { if (!fatal) CERROR("Can't create socket: port %d already in use\n", local_port); @@ -506,14 +510,13 @@ lnet_sock_listen(struct socket **sockp, __u32 local_ip, int local_port, } rc = kernel_listen(*sockp, backlog); - if (rc == 0) + if (!rc) return 0; CERROR("Can't set listen backlog %d: %d\n", backlog, rc); sock_release(*sockp); return rc; } -EXPORT_SYMBOL(lnet_sock_listen); int lnet_sock_accept(struct socket **newsockp, struct socket *sock) @@ -522,10 +525,10 @@ lnet_sock_accept(struct socket **newsockp, struct socket *sock) struct socket *newsock; int rc; - init_waitqueue_entry(&wait, current); - - /* XXX this should add a ref to sock->ops->owner, if - * TCP could be a module */ + /* + * XXX this should add a ref to sock->ops->owner, if + * TCP could be a module + */ rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock); if (rc) { CERROR("Can't allocate socket\n"); @@ -537,15 +540,15 @@ lnet_sock_accept(struct socket **newsockp, struct socket *sock) rc = sock->ops->accept(sock, newsock, O_NONBLOCK); if (rc == -EAGAIN) { /* Nothing ready, so wait for activity */ - set_current_state(TASK_INTERRUPTIBLE); + init_waitqueue_entry(&wait, current); add_wait_queue(sk_sleep(sock->sk), &wait); + set_current_state(TASK_INTERRUPTIBLE); schedule(); remove_wait_queue(sk_sleep(sock->sk), &wait); - set_current_state(TASK_RUNNING); rc = sock->ops->accept(sock, newsock, O_NONBLOCK); } - if (rc != 0) + if (rc) goto failed; *newsockp = newsock; @@ -555,7 +558,6 @@ failed: sock_release(newsock); return rc; } -EXPORT_SYMBOL(lnet_sock_accept); int lnet_sock_connect(struct socket **sockp, int *fatal, __u32 local_ip, @@ -565,7 +567,7 @@ lnet_sock_connect(struct socket **sockp, int *fatal, __u32 local_ip, int rc; rc = lnet_sock_create(sockp, fatal, local_ip, local_port); - if (rc != 0) + if (rc) return rc; memset(&srvaddr, 0, sizeof(srvaddr)); @@ -575,13 +577,15 @@ lnet_sock_connect(struct socket **sockp, int *fatal, __u32 local_ip, rc = kernel_connect(*sockp, (struct sockaddr *)&srvaddr, sizeof(srvaddr), 0); - if (rc == 0) + if (!rc) return 0; - /* EADDRNOTAVAIL probably means we're already connected to the same + /* + * EADDRNOTAVAIL probably means we're already connected to the same * peer/port on the same local port on a differently typed * connection. Let our caller retry with a different local - * port... */ + * port... + */ *fatal = !(rc == -EADDRNOTAVAIL); CDEBUG_LIMIT(*fatal ? D_NETERROR : D_NET, @@ -591,4 +595,3 @@ lnet_sock_connect(struct socket **sockp, int *fatal, __u32 local_ip, sock_release(*sockp); return rc; } -EXPORT_SYMBOL(lnet_sock_connect); diff --git a/drivers/staging/lustre/lnet/lnet/lo.c b/drivers/staging/lustre/lnet/lnet/lo.c index 2a137f468..468eda611 100644 --- a/drivers/staging/lustre/lnet/lnet/lo.c +++ b/drivers/staging/lustre/lnet/lnet/lo.c @@ -46,15 +46,15 @@ lolnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) static int lolnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct kvec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) + int delayed, unsigned int niov, + struct kvec *iov, lnet_kiov_t *kiov, + unsigned int offset, unsigned int mlen, unsigned int rlen) { lnet_msg_t *sendmsg = private; - if (lntmsg != NULL) { /* not discarding */ - if (sendmsg->msg_iov != NULL) { - if (iov != NULL) + if (lntmsg) { /* not discarding */ + if (sendmsg->msg_iov) { + if (iov) lnet_copy_iov2iov(niov, iov, offset, sendmsg->msg_niov, sendmsg->msg_iov, @@ -65,7 +65,7 @@ lolnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, sendmsg->msg_iov, sendmsg->msg_offset, mlen); } else { - if (iov != NULL) + if (iov) lnet_copy_kiov2iov(niov, iov, offset, sendmsg->msg_niov, sendmsg->msg_kiov, diff --git a/drivers/staging/lustre/lnet/lnet/module.c b/drivers/staging/lustre/lnet/lnet/module.c index c93c00752..93037c116 100644 --- a/drivers/staging/lustre/lnet/lnet/module.c +++ b/drivers/staging/lustre/lnet/lnet/module.c @@ -36,6 +36,7 @@ #define DEBUG_SUBSYSTEM S_LNET #include "../../include/linux/lnet/lib-lnet.h" +#include "../../include/linux/lnet/lib-dlc.h" static int config_on_load; module_param(config_on_load, int, 0444); @@ -52,13 +53,21 @@ lnet_configure(void *arg) mutex_lock(&lnet_config_mutex); if (!the_lnet.ln_niinit_self) { - rc = LNetNIInit(LUSTRE_SRV_LNET_PID); + rc = try_module_get(THIS_MODULE); + + if (rc != 1) + goto out; + + rc = LNetNIInit(LNET_PID_LUSTRE); if (rc >= 0) { the_lnet.ln_niinit_self = 1; rc = 0; + } else { + module_put(THIS_MODULE); } } +out: mutex_unlock(&lnet_config_mutex); return rc; } @@ -73,6 +82,7 @@ lnet_unconfigure(void) if (the_lnet.ln_niinit_self) { the_lnet.ln_niinit_self = 0; LNetNIFini(); + module_put(THIS_MODULE); } mutex_lock(&the_lnet.ln_api_mutex); @@ -80,28 +90,93 @@ lnet_unconfigure(void) mutex_unlock(&the_lnet.ln_api_mutex); mutex_unlock(&lnet_config_mutex); - return (refcount == 0) ? 0 : -EBUSY; + return !refcount ? 0 : -EBUSY; } static int -lnet_ioctl(unsigned int cmd, struct libcfs_ioctl_data *data) +lnet_dyn_configure(struct libcfs_ioctl_hdr *hdr) +{ + struct lnet_ioctl_config_data *conf = + (struct lnet_ioctl_config_data *)hdr; + int rc; + + if (conf->cfg_hdr.ioc_len < sizeof(*conf)) + return -EINVAL; + + mutex_lock(&lnet_config_mutex); + if (!the_lnet.ln_niinit_self) { + rc = -EINVAL; + goto out_unlock; + } + rc = lnet_dyn_add_ni(LNET_PID_LUSTRE, + conf->cfg_config_u.cfg_net.net_intf, + conf->cfg_config_u.cfg_net.net_peer_timeout, + conf->cfg_config_u.cfg_net.net_peer_tx_credits, + conf->cfg_config_u.cfg_net.net_peer_rtr_credits, + conf->cfg_config_u.cfg_net.net_max_tx_credits); +out_unlock: + mutex_unlock(&lnet_config_mutex); + + return rc; +} + +static int +lnet_dyn_unconfigure(struct libcfs_ioctl_hdr *hdr) +{ + struct lnet_ioctl_config_data *conf = + (struct lnet_ioctl_config_data *)hdr; + int rc; + + if (conf->cfg_hdr.ioc_len < sizeof(*conf)) + return -EINVAL; + + mutex_lock(&lnet_config_mutex); + if (!the_lnet.ln_niinit_self) { + rc = -EINVAL; + goto out_unlock; + } + rc = lnet_dyn_del_ni(conf->cfg_net); +out_unlock: + mutex_unlock(&lnet_config_mutex); + + return rc; +} + +static int +lnet_ioctl(unsigned int cmd, struct libcfs_ioctl_hdr *hdr) { int rc; switch (cmd) { - case IOC_LIBCFS_CONFIGURE: + case IOC_LIBCFS_CONFIGURE: { + struct libcfs_ioctl_data *data = + (struct libcfs_ioctl_data *)hdr; + + if (data->ioc_hdr.ioc_len < sizeof(*data)) + return -EINVAL; + + the_lnet.ln_nis_from_mod_params = data->ioc_flags; return lnet_configure(NULL); + } case IOC_LIBCFS_UNCONFIGURE: return lnet_unconfigure(); + case IOC_LIBCFS_ADD_NET: + return lnet_dyn_configure(hdr); + + case IOC_LIBCFS_DEL_NET: + return lnet_dyn_unconfigure(hdr); + default: - /* Passing LNET_PID_ANY only gives me a ref if the net is up + /* + * Passing LNET_PID_ANY only gives me a ref if the net is up * already; I'll need it to ensure the net can't go down while - * I'm called into it */ + * I'm called into it + */ rc = LNetNIInit(LNET_PID_ANY); if (rc >= 0) { - rc = LNetCtl(cmd, data); + rc = LNetCtl(cmd, hdr); LNetNIFini(); } return rc; @@ -110,46 +185,46 @@ lnet_ioctl(unsigned int cmd, struct libcfs_ioctl_data *data) static DECLARE_IOCTL_HANDLER(lnet_ioctl_handler, lnet_ioctl); -static int __init -init_lnet(void) +static int __init lnet_init(void) { int rc; mutex_init(&lnet_config_mutex); - rc = lnet_init(); - if (rc != 0) { - CERROR("lnet_init: error %d\n", rc); + rc = lnet_lib_init(); + if (rc) { + CERROR("lnet_lib_init: error %d\n", rc); return rc; } rc = libcfs_register_ioctl(&lnet_ioctl_handler); - LASSERT(rc == 0); + LASSERT(!rc); if (config_on_load) { - /* Have to schedule a separate thread to avoid deadlocking - * in modload */ + /* + * Have to schedule a separate thread to avoid deadlocking + * in modload + */ (void) kthread_run(lnet_configure, NULL, "lnet_initd"); } return 0; } -static void __exit -fini_lnet(void) +static void __exit lnet_exit(void) { int rc; rc = libcfs_deregister_ioctl(&lnet_ioctl_handler); - LASSERT(rc == 0); + LASSERT(!rc); - lnet_fini(); + lnet_lib_exit(); } MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>"); -MODULE_DESCRIPTION("LNet v3.1"); +MODULE_DESCRIPTION("Lustre Networking layer"); +MODULE_VERSION(LNET_VERSION); MODULE_LICENSE("GPL"); -MODULE_VERSION("1.0.0"); -module_init(init_lnet); -module_exit(fini_lnet); +module_init(lnet_init); +module_exit(lnet_exit); diff --git a/drivers/staging/lustre/lnet/lnet/net_fault.c b/drivers/staging/lustre/lnet/lnet/net_fault.c new file mode 100644 index 000000000..7d76f28d3 --- /dev/null +++ b/drivers/staging/lustre/lnet/lnet/net_fault.c @@ -0,0 +1,1025 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2014, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Seagate, Inc. + * + * lnet/lnet/net_fault.c + * + * Lustre network fault simulation + * + * Author: liang.zhen@intel.com + */ + +#define DEBUG_SUBSYSTEM S_LNET + +#include "../../include/linux/lnet/lib-lnet.h" +#include "../../include/linux/lnet/lnetctl.h" + +#define LNET_MSG_MASK (LNET_PUT_BIT | LNET_ACK_BIT | \ + LNET_GET_BIT | LNET_REPLY_BIT) + +struct lnet_drop_rule { + /** link chain on the_lnet.ln_drop_rules */ + struct list_head dr_link; + /** attributes of this rule */ + struct lnet_fault_attr dr_attr; + /** lock to protect \a dr_drop_at and \a dr_stat */ + spinlock_t dr_lock; + /** + * the message sequence to drop, which means message is dropped when + * dr_stat.drs_count == dr_drop_at + */ + unsigned long dr_drop_at; + /** + * seconds to drop the next message, it's exclusive with dr_drop_at + */ + unsigned long dr_drop_time; + /** baseline to caculate dr_drop_time */ + unsigned long dr_time_base; + /** statistic of dropped messages */ + struct lnet_fault_stat dr_stat; +}; + +static bool +lnet_fault_nid_match(lnet_nid_t nid, lnet_nid_t msg_nid) +{ + if (nid == msg_nid || nid == LNET_NID_ANY) + return true; + + if (LNET_NIDNET(nid) != LNET_NIDNET(msg_nid)) + return false; + + /* 255.255.255.255@net is wildcard for all addresses in a network */ + return LNET_NIDADDR(nid) == LNET_NIDADDR(LNET_NID_ANY); +} + +static bool +lnet_fault_attr_match(struct lnet_fault_attr *attr, lnet_nid_t src, + lnet_nid_t dst, unsigned int type, unsigned int portal) +{ + if (!lnet_fault_nid_match(attr->fa_src, src) || + !lnet_fault_nid_match(attr->fa_dst, dst)) + return false; + + if (!(attr->fa_msg_mask & (1 << type))) + return false; + + /** + * NB: ACK and REPLY have no portal, but they should have been + * rejected by message mask + */ + if (attr->fa_ptl_mask && /* has portal filter */ + !(attr->fa_ptl_mask & (1ULL << portal))) + return false; + + return true; +} + +static int +lnet_fault_attr_validate(struct lnet_fault_attr *attr) +{ + if (!attr->fa_msg_mask) + attr->fa_msg_mask = LNET_MSG_MASK; /* all message types */ + + if (!attr->fa_ptl_mask) /* no portal filter */ + return 0; + + /* NB: only PUT and GET can be filtered if portal filter has been set */ + attr->fa_msg_mask &= LNET_GET_BIT | LNET_PUT_BIT; + if (!attr->fa_msg_mask) { + CDEBUG(D_NET, "can't find valid message type bits %x\n", + attr->fa_msg_mask); + return -EINVAL; + } + return 0; +} + +static void +lnet_fault_stat_inc(struct lnet_fault_stat *stat, unsigned int type) +{ + /* NB: fs_counter is NOT updated by this function */ + switch (type) { + case LNET_MSG_PUT: + stat->fs_put++; + return; + case LNET_MSG_ACK: + stat->fs_ack++; + return; + case LNET_MSG_GET: + stat->fs_get++; + return; + case LNET_MSG_REPLY: + stat->fs_reply++; + return; + } +} + +/** + * LNet message drop simulation + */ + +/** + * Add a new drop rule to LNet + * There is no check for duplicated drop rule, all rules will be checked for + * incoming message. + */ +static int +lnet_drop_rule_add(struct lnet_fault_attr *attr) +{ + struct lnet_drop_rule *rule; + + if (attr->u.drop.da_rate & attr->u.drop.da_interval) { + CDEBUG(D_NET, "please provide either drop rate or drop interval, but not both at the same time %d/%d\n", + attr->u.drop.da_rate, attr->u.drop.da_interval); + return -EINVAL; + } + + if (lnet_fault_attr_validate(attr)) + return -EINVAL; + + CFS_ALLOC_PTR(rule); + if (!rule) + return -ENOMEM; + + spin_lock_init(&rule->dr_lock); + + rule->dr_attr = *attr; + if (attr->u.drop.da_interval) { + rule->dr_time_base = cfs_time_shift(attr->u.drop.da_interval); + rule->dr_drop_time = cfs_time_shift(cfs_rand() % + attr->u.drop.da_interval); + } else { + rule->dr_drop_at = cfs_rand() % attr->u.drop.da_rate; + } + + lnet_net_lock(LNET_LOCK_EX); + list_add(&rule->dr_link, &the_lnet.ln_drop_rules); + lnet_net_unlock(LNET_LOCK_EX); + + CDEBUG(D_NET, "Added drop rule: src %s, dst %s, rate %d, interval %d\n", + libcfs_nid2str(attr->fa_src), libcfs_nid2str(attr->fa_src), + attr->u.drop.da_rate, attr->u.drop.da_interval); + return 0; +} + +/** + * Remove matched drop rules from lnet, all rules that can match \a src and + * \a dst will be removed. + * If \a src is zero, then all rules have \a dst as destination will be remove + * If \a dst is zero, then all rules have \a src as source will be removed + * If both of them are zero, all rules will be removed + */ +static int +lnet_drop_rule_del(lnet_nid_t src, lnet_nid_t dst) +{ + struct lnet_drop_rule *rule; + struct lnet_drop_rule *tmp; + struct list_head zombies; + int n = 0; + + INIT_LIST_HEAD(&zombies); + + lnet_net_lock(LNET_LOCK_EX); + list_for_each_entry_safe(rule, tmp, &the_lnet.ln_drop_rules, dr_link) { + if (rule->dr_attr.fa_src != src && src) + continue; + + if (rule->dr_attr.fa_dst != dst && dst) + continue; + + list_move(&rule->dr_link, &zombies); + } + lnet_net_unlock(LNET_LOCK_EX); + + list_for_each_entry_safe(rule, tmp, &zombies, dr_link) { + CDEBUG(D_NET, "Remove drop rule: src %s->dst: %s (1/%d, %d)\n", + libcfs_nid2str(rule->dr_attr.fa_src), + libcfs_nid2str(rule->dr_attr.fa_dst), + rule->dr_attr.u.drop.da_rate, + rule->dr_attr.u.drop.da_interval); + + list_del(&rule->dr_link); + CFS_FREE_PTR(rule); + n++; + } + + return n; +} + +/** + * List drop rule at position of \a pos + */ +static int +lnet_drop_rule_list(int pos, struct lnet_fault_attr *attr, + struct lnet_fault_stat *stat) +{ + struct lnet_drop_rule *rule; + int cpt; + int i = 0; + int rc = -ENOENT; + + cpt = lnet_net_lock_current(); + list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) { + if (i++ < pos) + continue; + + spin_lock(&rule->dr_lock); + *attr = rule->dr_attr; + *stat = rule->dr_stat; + spin_unlock(&rule->dr_lock); + rc = 0; + break; + } + + lnet_net_unlock(cpt); + return rc; +} + +/** + * reset counters for all drop rules + */ +static void +lnet_drop_rule_reset(void) +{ + struct lnet_drop_rule *rule; + int cpt; + + cpt = lnet_net_lock_current(); + + list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) { + struct lnet_fault_attr *attr = &rule->dr_attr; + + spin_lock(&rule->dr_lock); + + memset(&rule->dr_stat, 0, sizeof(rule->dr_stat)); + if (attr->u.drop.da_rate) { + rule->dr_drop_at = cfs_rand() % attr->u.drop.da_rate; + } else { + rule->dr_drop_time = cfs_time_shift(cfs_rand() % + attr->u.drop.da_interval); + rule->dr_time_base = cfs_time_shift(attr->u.drop.da_interval); + } + spin_unlock(&rule->dr_lock); + } + + lnet_net_unlock(cpt); +} + +/** + * check source/destination NID, portal, message type and drop rate, + * decide whether should drop this message or not + */ +static bool +drop_rule_match(struct lnet_drop_rule *rule, lnet_nid_t src, + lnet_nid_t dst, unsigned int type, unsigned int portal) +{ + struct lnet_fault_attr *attr = &rule->dr_attr; + bool drop; + + if (!lnet_fault_attr_match(attr, src, dst, type, portal)) + return false; + + /* match this rule, check drop rate now */ + spin_lock(&rule->dr_lock); + if (rule->dr_drop_time) { /* time based drop */ + unsigned long now = cfs_time_current(); + + rule->dr_stat.fs_count++; + drop = cfs_time_aftereq(now, rule->dr_drop_time); + if (drop) { + if (cfs_time_after(now, rule->dr_time_base)) + rule->dr_time_base = now; + + rule->dr_drop_time = rule->dr_time_base + + cfs_time_seconds(cfs_rand() % + attr->u.drop.da_interval); + rule->dr_time_base += cfs_time_seconds(attr->u.drop.da_interval); + + CDEBUG(D_NET, "Drop Rule %s->%s: next drop : %lu\n", + libcfs_nid2str(attr->fa_src), + libcfs_nid2str(attr->fa_dst), + rule->dr_drop_time); + } + + } else { /* rate based drop */ + drop = rule->dr_stat.fs_count++ == rule->dr_drop_at; + + if (!do_div(rule->dr_stat.fs_count, attr->u.drop.da_rate)) { + rule->dr_drop_at = rule->dr_stat.fs_count + + cfs_rand() % attr->u.drop.da_rate; + CDEBUG(D_NET, "Drop Rule %s->%s: next drop: %lu\n", + libcfs_nid2str(attr->fa_src), + libcfs_nid2str(attr->fa_dst), rule->dr_drop_at); + } + } + + if (drop) { /* drop this message, update counters */ + lnet_fault_stat_inc(&rule->dr_stat, type); + rule->dr_stat.u.drop.ds_dropped++; + } + + spin_unlock(&rule->dr_lock); + return drop; +} + +/** + * Check if message from \a src to \a dst can match any existed drop rule + */ +bool +lnet_drop_rule_match(lnet_hdr_t *hdr) +{ + struct lnet_drop_rule *rule; + lnet_nid_t src = le64_to_cpu(hdr->src_nid); + lnet_nid_t dst = le64_to_cpu(hdr->dest_nid); + unsigned int typ = le32_to_cpu(hdr->type); + unsigned int ptl = -1; + bool drop = false; + int cpt; + + /** + * NB: if Portal is specified, then only PUT and GET will be + * filtered by drop rule + */ + if (typ == LNET_MSG_PUT) + ptl = le32_to_cpu(hdr->msg.put.ptl_index); + else if (typ == LNET_MSG_GET) + ptl = le32_to_cpu(hdr->msg.get.ptl_index); + + cpt = lnet_net_lock_current(); + list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) { + drop = drop_rule_match(rule, src, dst, typ, ptl); + if (drop) + break; + } + + lnet_net_unlock(cpt); + return drop; +} + +/** + * LNet Delay Simulation + */ +/** timestamp (second) to send delayed message */ +#define msg_delay_send msg_ev.hdr_data + +struct lnet_delay_rule { + /** link chain on the_lnet.ln_delay_rules */ + struct list_head dl_link; + /** link chain on delay_dd.dd_sched_rules */ + struct list_head dl_sched_link; + /** attributes of this rule */ + struct lnet_fault_attr dl_attr; + /** lock to protect \a below members */ + spinlock_t dl_lock; + /** refcount of delay rule */ + atomic_t dl_refcount; + /** + * the message sequence to delay, which means message is delayed when + * dl_stat.fs_count == dl_delay_at + */ + unsigned long dl_delay_at; + /** + * seconds to delay the next message, it's exclusive with dl_delay_at + */ + unsigned long dl_delay_time; + /** baseline to caculate dl_delay_time */ + unsigned long dl_time_base; + /** jiffies to send the next delayed message */ + unsigned long dl_msg_send; + /** delayed message list */ + struct list_head dl_msg_list; + /** statistic of delayed messages */ + struct lnet_fault_stat dl_stat; + /** timer to wakeup delay_daemon */ + struct timer_list dl_timer; +}; + +struct delay_daemon_data { + /** serialise rule add/remove */ + struct mutex dd_mutex; + /** protect rules on \a dd_sched_rules */ + spinlock_t dd_lock; + /** scheduled delay rules (by timer) */ + struct list_head dd_sched_rules; + /** daemon thread sleeps at here */ + wait_queue_head_t dd_waitq; + /** controller (lctl command) wait at here */ + wait_queue_head_t dd_ctl_waitq; + /** daemon is running */ + unsigned int dd_running; + /** daemon stopped */ + unsigned int dd_stopped; +}; + +static struct delay_daemon_data delay_dd; + +static unsigned long +round_timeout(unsigned long timeout) +{ + return cfs_time_seconds((unsigned int) + cfs_duration_sec(cfs_time_sub(timeout, 0)) + 1); +} + +static void +delay_rule_decref(struct lnet_delay_rule *rule) +{ + if (atomic_dec_and_test(&rule->dl_refcount)) { + LASSERT(list_empty(&rule->dl_sched_link)); + LASSERT(list_empty(&rule->dl_msg_list)); + LASSERT(list_empty(&rule->dl_link)); + + CFS_FREE_PTR(rule); + } +} + +/** + * check source/destination NID, portal, message type and delay rate, + * decide whether should delay this message or not + */ +static bool +delay_rule_match(struct lnet_delay_rule *rule, lnet_nid_t src, + lnet_nid_t dst, unsigned int type, unsigned int portal, + struct lnet_msg *msg) +{ + struct lnet_fault_attr *attr = &rule->dl_attr; + bool delay; + + if (!lnet_fault_attr_match(attr, src, dst, type, portal)) + return false; + + /* match this rule, check delay rate now */ + spin_lock(&rule->dl_lock); + if (rule->dl_delay_time) { /* time based delay */ + unsigned long now = cfs_time_current(); + + rule->dl_stat.fs_count++; + delay = cfs_time_aftereq(now, rule->dl_delay_time); + if (delay) { + if (cfs_time_after(now, rule->dl_time_base)) + rule->dl_time_base = now; + + rule->dl_delay_time = rule->dl_time_base + + cfs_time_seconds(cfs_rand() % + attr->u.delay.la_interval); + rule->dl_time_base += cfs_time_seconds(attr->u.delay.la_interval); + + CDEBUG(D_NET, "Delay Rule %s->%s: next delay : %lu\n", + libcfs_nid2str(attr->fa_src), + libcfs_nid2str(attr->fa_dst), + rule->dl_delay_time); + } + + } else { /* rate based delay */ + delay = rule->dl_stat.fs_count++ == rule->dl_delay_at; + /* generate the next random rate sequence */ + if (!do_div(rule->dl_stat.fs_count, attr->u.delay.la_rate)) { + rule->dl_delay_at = rule->dl_stat.fs_count + + cfs_rand() % attr->u.delay.la_rate; + CDEBUG(D_NET, "Delay Rule %s->%s: next delay: %lu\n", + libcfs_nid2str(attr->fa_src), + libcfs_nid2str(attr->fa_dst), rule->dl_delay_at); + } + } + + if (!delay) { + spin_unlock(&rule->dl_lock); + return false; + } + + /* delay this message, update counters */ + lnet_fault_stat_inc(&rule->dl_stat, type); + rule->dl_stat.u.delay.ls_delayed++; + + list_add_tail(&msg->msg_list, &rule->dl_msg_list); + msg->msg_delay_send = round_timeout( + cfs_time_shift(attr->u.delay.la_latency)); + if (rule->dl_msg_send == -1) { + rule->dl_msg_send = msg->msg_delay_send; + mod_timer(&rule->dl_timer, rule->dl_msg_send); + } + + spin_unlock(&rule->dl_lock); + return true; +} + +/** + * check if \a msg can match any Delay Rule, receiving of this message + * will be delayed if there is a match. + */ +bool +lnet_delay_rule_match_locked(lnet_hdr_t *hdr, struct lnet_msg *msg) +{ + struct lnet_delay_rule *rule; + lnet_nid_t src = le64_to_cpu(hdr->src_nid); + lnet_nid_t dst = le64_to_cpu(hdr->dest_nid); + unsigned int typ = le32_to_cpu(hdr->type); + unsigned int ptl = -1; + + /* NB: called with hold of lnet_net_lock */ + + /** + * NB: if Portal is specified, then only PUT and GET will be + * filtered by delay rule + */ + if (typ == LNET_MSG_PUT) + ptl = le32_to_cpu(hdr->msg.put.ptl_index); + else if (typ == LNET_MSG_GET) + ptl = le32_to_cpu(hdr->msg.get.ptl_index); + + list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) { + if (delay_rule_match(rule, src, dst, typ, ptl, msg)) + return true; + } + + return false; +} + +/** check out delayed messages for send */ +static void +delayed_msg_check(struct lnet_delay_rule *rule, bool all, + struct list_head *msg_list) +{ + struct lnet_msg *msg; + struct lnet_msg *tmp; + unsigned long now = cfs_time_current(); + + if (!all && rule->dl_msg_send > now) + return; + + spin_lock(&rule->dl_lock); + list_for_each_entry_safe(msg, tmp, &rule->dl_msg_list, msg_list) { + if (!all && msg->msg_delay_send > now) + break; + + msg->msg_delay_send = 0; + list_move_tail(&msg->msg_list, msg_list); + } + + if (list_empty(&rule->dl_msg_list)) { + del_timer(&rule->dl_timer); + rule->dl_msg_send = -1; + + } else if (!list_empty(msg_list)) { + /* + * dequeued some timedout messages, update timer for the + * next delayed message on rule + */ + msg = list_entry(rule->dl_msg_list.next, + struct lnet_msg, msg_list); + rule->dl_msg_send = msg->msg_delay_send; + mod_timer(&rule->dl_timer, rule->dl_msg_send); + } + spin_unlock(&rule->dl_lock); +} + +static void +delayed_msg_process(struct list_head *msg_list, bool drop) +{ + struct lnet_msg *msg; + + while (!list_empty(msg_list)) { + struct lnet_ni *ni; + int cpt; + int rc; + + msg = list_entry(msg_list->next, struct lnet_msg, msg_list); + LASSERT(msg->msg_rxpeer); + + ni = msg->msg_rxpeer->lp_ni; + cpt = msg->msg_rx_cpt; + + list_del_init(&msg->msg_list); + if (drop) { + rc = -ECANCELED; + + } else if (!msg->msg_routing) { + rc = lnet_parse_local(ni, msg); + if (!rc) + continue; + + } else { + lnet_net_lock(cpt); + rc = lnet_parse_forward_locked(ni, msg); + lnet_net_unlock(cpt); + + switch (rc) { + case LNET_CREDIT_OK: + lnet_ni_recv(ni, msg->msg_private, msg, 0, + 0, msg->msg_len, msg->msg_len); + case LNET_CREDIT_WAIT: + continue; + default: /* failures */ + break; + } + } + + lnet_drop_message(ni, cpt, msg->msg_private, msg->msg_len); + lnet_finalize(ni, msg, rc); + } +} + +/** + * Process delayed messages for scheduled rules + * This function can either be called by delay_rule_daemon, or by lnet_finalise + */ +void +lnet_delay_rule_check(void) +{ + struct lnet_delay_rule *rule; + struct list_head msgs; + + INIT_LIST_HEAD(&msgs); + while (1) { + if (list_empty(&delay_dd.dd_sched_rules)) + break; + + spin_lock_bh(&delay_dd.dd_lock); + if (list_empty(&delay_dd.dd_sched_rules)) { + spin_unlock_bh(&delay_dd.dd_lock); + break; + } + + rule = list_entry(delay_dd.dd_sched_rules.next, + struct lnet_delay_rule, dl_sched_link); + list_del_init(&rule->dl_sched_link); + spin_unlock_bh(&delay_dd.dd_lock); + + delayed_msg_check(rule, false, &msgs); + delay_rule_decref(rule); /* -1 for delay_dd.dd_sched_rules */ + } + + if (!list_empty(&msgs)) + delayed_msg_process(&msgs, false); +} + +/** daemon thread to handle delayed messages */ +static int +lnet_delay_rule_daemon(void *arg) +{ + delay_dd.dd_running = 1; + wake_up(&delay_dd.dd_ctl_waitq); + + while (delay_dd.dd_running) { + wait_event_interruptible(delay_dd.dd_waitq, + !delay_dd.dd_running || + !list_empty(&delay_dd.dd_sched_rules)); + lnet_delay_rule_check(); + } + + /* in case more rules have been enqueued after my last check */ + lnet_delay_rule_check(); + delay_dd.dd_stopped = 1; + wake_up(&delay_dd.dd_ctl_waitq); + + return 0; +} + +static void +delay_timer_cb(unsigned long arg) +{ + struct lnet_delay_rule *rule = (struct lnet_delay_rule *)arg; + + spin_lock_bh(&delay_dd.dd_lock); + if (list_empty(&rule->dl_sched_link) && delay_dd.dd_running) { + atomic_inc(&rule->dl_refcount); + list_add_tail(&rule->dl_sched_link, &delay_dd.dd_sched_rules); + wake_up(&delay_dd.dd_waitq); + } + spin_unlock_bh(&delay_dd.dd_lock); +} + +/** + * Add a new delay rule to LNet + * There is no check for duplicated delay rule, all rules will be checked for + * incoming message. + */ +int +lnet_delay_rule_add(struct lnet_fault_attr *attr) +{ + struct lnet_delay_rule *rule; + int rc = 0; + + if (attr->u.delay.la_rate & attr->u.delay.la_interval) { + CDEBUG(D_NET, "please provide either delay rate or delay interval, but not both at the same time %d/%d\n", + attr->u.delay.la_rate, attr->u.delay.la_interval); + return -EINVAL; + } + + if (!attr->u.delay.la_latency) { + CDEBUG(D_NET, "delay latency cannot be zero\n"); + return -EINVAL; + } + + if (lnet_fault_attr_validate(attr)) + return -EINVAL; + + CFS_ALLOC_PTR(rule); + if (!rule) + return -ENOMEM; + + mutex_lock(&delay_dd.dd_mutex); + if (!delay_dd.dd_running) { + struct task_struct *task; + + /** + * NB: although LND threads will process delayed message + * in lnet_finalize, but there is no guarantee that LND + * threads will be waken up if no other message needs to + * be handled. + * Only one daemon thread, performance is not the concern + * of this simualation module. + */ + task = kthread_run(lnet_delay_rule_daemon, NULL, "lnet_dd"); + if (IS_ERR(task)) { + rc = PTR_ERR(task); + goto failed; + } + wait_event(delay_dd.dd_ctl_waitq, delay_dd.dd_running); + } + + init_timer(&rule->dl_timer); + rule->dl_timer.function = delay_timer_cb; + rule->dl_timer.data = (unsigned long)rule; + + spin_lock_init(&rule->dl_lock); + INIT_LIST_HEAD(&rule->dl_msg_list); + INIT_LIST_HEAD(&rule->dl_sched_link); + + rule->dl_attr = *attr; + if (attr->u.delay.la_interval) { + rule->dl_time_base = cfs_time_shift(attr->u.delay.la_interval); + rule->dl_delay_time = cfs_time_shift(cfs_rand() % + attr->u.delay.la_interval); + } else { + rule->dl_delay_at = cfs_rand() % attr->u.delay.la_rate; + } + + rule->dl_msg_send = -1; + + lnet_net_lock(LNET_LOCK_EX); + atomic_set(&rule->dl_refcount, 1); + list_add(&rule->dl_link, &the_lnet.ln_delay_rules); + lnet_net_unlock(LNET_LOCK_EX); + + CDEBUG(D_NET, "Added delay rule: src %s, dst %s, rate %d\n", + libcfs_nid2str(attr->fa_src), libcfs_nid2str(attr->fa_src), + attr->u.delay.la_rate); + + mutex_unlock(&delay_dd.dd_mutex); + return 0; +failed: + mutex_unlock(&delay_dd.dd_mutex); + CFS_FREE_PTR(rule); + return rc; +} + +/** + * Remove matched Delay Rules from lnet, if \a shutdown is true or both \a src + * and \a dst are zero, all rules will be removed, otherwise only matched rules + * will be removed. + * If \a src is zero, then all rules have \a dst as destination will be remove + * If \a dst is zero, then all rules have \a src as source will be removed + * + * When a delay rule is removed, all delayed messages of this rule will be + * processed immediately. + */ +int +lnet_delay_rule_del(lnet_nid_t src, lnet_nid_t dst, bool shutdown) +{ + struct lnet_delay_rule *rule; + struct lnet_delay_rule *tmp; + struct list_head rule_list; + struct list_head msg_list; + int n = 0; + bool cleanup; + + INIT_LIST_HEAD(&rule_list); + INIT_LIST_HEAD(&msg_list); + + if (shutdown) { + src = 0; + dst = 0; + } + + mutex_lock(&delay_dd.dd_mutex); + lnet_net_lock(LNET_LOCK_EX); + + list_for_each_entry_safe(rule, tmp, &the_lnet.ln_delay_rules, dl_link) { + if (rule->dl_attr.fa_src != src && src) + continue; + + if (rule->dl_attr.fa_dst != dst && dst) + continue; + + CDEBUG(D_NET, "Remove delay rule: src %s->dst: %s (1/%d, %d)\n", + libcfs_nid2str(rule->dl_attr.fa_src), + libcfs_nid2str(rule->dl_attr.fa_dst), + rule->dl_attr.u.delay.la_rate, + rule->dl_attr.u.delay.la_interval); + /* refcount is taken over by rule_list */ + list_move(&rule->dl_link, &rule_list); + } + + /* check if we need to shutdown delay_daemon */ + cleanup = list_empty(&the_lnet.ln_delay_rules) && + !list_empty(&rule_list); + lnet_net_unlock(LNET_LOCK_EX); + + list_for_each_entry_safe(rule, tmp, &rule_list, dl_link) { + list_del_init(&rule->dl_link); + + del_timer_sync(&rule->dl_timer); + delayed_msg_check(rule, true, &msg_list); + delay_rule_decref(rule); /* -1 for the_lnet.ln_delay_rules */ + n++; + } + + if (cleanup) { /* no more delay rule, shutdown delay_daemon */ + LASSERT(delay_dd.dd_running); + delay_dd.dd_running = 0; + wake_up(&delay_dd.dd_waitq); + + while (!delay_dd.dd_stopped) + wait_event(delay_dd.dd_ctl_waitq, delay_dd.dd_stopped); + } + mutex_unlock(&delay_dd.dd_mutex); + + if (!list_empty(&msg_list)) + delayed_msg_process(&msg_list, shutdown); + + return n; +} + +/** + * List Delay Rule at position of \a pos + */ +int +lnet_delay_rule_list(int pos, struct lnet_fault_attr *attr, + struct lnet_fault_stat *stat) +{ + struct lnet_delay_rule *rule; + int cpt; + int i = 0; + int rc = -ENOENT; + + cpt = lnet_net_lock_current(); + list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) { + if (i++ < pos) + continue; + + spin_lock(&rule->dl_lock); + *attr = rule->dl_attr; + *stat = rule->dl_stat; + spin_unlock(&rule->dl_lock); + rc = 0; + break; + } + + lnet_net_unlock(cpt); + return rc; +} + +/** + * reset counters for all Delay Rules + */ +void +lnet_delay_rule_reset(void) +{ + struct lnet_delay_rule *rule; + int cpt; + + cpt = lnet_net_lock_current(); + + list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) { + struct lnet_fault_attr *attr = &rule->dl_attr; + + spin_lock(&rule->dl_lock); + + memset(&rule->dl_stat, 0, sizeof(rule->dl_stat)); + if (attr->u.delay.la_rate) { + rule->dl_delay_at = cfs_rand() % attr->u.delay.la_rate; + } else { + rule->dl_delay_time = cfs_time_shift(cfs_rand() % + attr->u.delay.la_interval); + rule->dl_time_base = cfs_time_shift(attr->u.delay.la_interval); + } + spin_unlock(&rule->dl_lock); + } + + lnet_net_unlock(cpt); +} + +int +lnet_fault_ctl(int opc, struct libcfs_ioctl_data *data) +{ + struct lnet_fault_attr *attr; + struct lnet_fault_stat *stat; + + attr = (struct lnet_fault_attr *)data->ioc_inlbuf1; + + switch (opc) { + default: + return -EINVAL; + + case LNET_CTL_DROP_ADD: + if (!attr) + return -EINVAL; + + return lnet_drop_rule_add(attr); + + case LNET_CTL_DROP_DEL: + if (!attr) + return -EINVAL; + + data->ioc_count = lnet_drop_rule_del(attr->fa_src, + attr->fa_dst); + return 0; + + case LNET_CTL_DROP_RESET: + lnet_drop_rule_reset(); + return 0; + + case LNET_CTL_DROP_LIST: + stat = (struct lnet_fault_stat *)data->ioc_inlbuf2; + if (!attr || !stat) + return -EINVAL; + + return lnet_drop_rule_list(data->ioc_count, attr, stat); + + case LNET_CTL_DELAY_ADD: + if (!attr) + return -EINVAL; + + return lnet_delay_rule_add(attr); + + case LNET_CTL_DELAY_DEL: + if (!attr) + return -EINVAL; + + data->ioc_count = lnet_delay_rule_del(attr->fa_src, + attr->fa_dst, false); + return 0; + + case LNET_CTL_DELAY_RESET: + lnet_delay_rule_reset(); + return 0; + + case LNET_CTL_DELAY_LIST: + stat = (struct lnet_fault_stat *)data->ioc_inlbuf2; + if (!attr || !stat) + return -EINVAL; + + return lnet_delay_rule_list(data->ioc_count, attr, stat); + } +} + +int +lnet_fault_init(void) +{ + CLASSERT(LNET_PUT_BIT == 1 << LNET_MSG_PUT); + CLASSERT(LNET_ACK_BIT == 1 << LNET_MSG_ACK); + CLASSERT(LNET_GET_BIT == 1 << LNET_MSG_GET); + CLASSERT(LNET_REPLY_BIT == 1 << LNET_MSG_REPLY); + + mutex_init(&delay_dd.dd_mutex); + spin_lock_init(&delay_dd.dd_lock); + init_waitqueue_head(&delay_dd.dd_waitq); + init_waitqueue_head(&delay_dd.dd_ctl_waitq); + INIT_LIST_HEAD(&delay_dd.dd_sched_rules); + + return 0; +} + +void +lnet_fault_fini(void) +{ + lnet_drop_rule_del(0, 0); + lnet_delay_rule_del(0, 0, true); + + LASSERT(list_empty(&the_lnet.ln_drop_rules)); + LASSERT(list_empty(&the_lnet.ln_delay_rules)); + LASSERT(list_empty(&delay_dd.dd_sched_rules)); +} diff --git a/drivers/staging/lustre/lnet/lnet/nidstrings.c b/drivers/staging/lustre/lnet/lnet/nidstrings.c index 80f585afa..ebf468fbc 100644 --- a/drivers/staging/lustre/lnet/lnet/nidstrings.c +++ b/drivers/staging/lustre/lnet/lnet/nidstrings.c @@ -170,7 +170,7 @@ parse_addrange(const struct cfs_lstr *src, struct nidrange *nidrange) } LIBCFS_ALLOC(addrrange, sizeof(struct addrrange)); - if (addrrange == NULL) + if (!addrrange) return -ENOMEM; list_add_tail(&addrrange->ar_link, &nidrange->nr_addrranges); INIT_LIST_HEAD(&addrrange->ar_numaddr_ranges); @@ -203,16 +203,18 @@ add_nidrange(const struct cfs_lstr *src, return NULL; nf = libcfs_namenum2netstrfns(src->ls_str); - if (nf == NULL) + if (!nf) return NULL; endlen = src->ls_len - strlen(nf->nf_name); - if (endlen == 0) + if (!endlen) /* network name only, e.g. "elan" or "tcp" */ netnum = 0; else { - /* e.g. "elan25" or "tcp23", refuse to parse if + /* + * e.g. "elan25" or "tcp23", refuse to parse if * network name is not appended with decimal or - * hexadecimal number */ + * hexadecimal number + */ if (!cfs_str2num_check(src->ls_str + strlen(nf->nf_name), endlen, &netnum, 0, MAX_NUMERIC_VALUE)) return NULL; @@ -227,7 +229,7 @@ add_nidrange(const struct cfs_lstr *src, } LIBCFS_ALLOC(nr, sizeof(struct nidrange)); - if (nr == NULL) + if (!nr) return NULL; list_add_tail(&nr->nr_link, nidlist); INIT_LIST_HEAD(&nr->nr_addrranges); @@ -253,22 +255,21 @@ parse_nidrange(struct cfs_lstr *src, struct list_head *nidlist) struct nidrange *nr; tmp = *src; - if (cfs_gettok(src, '@', &addrrange) == 0) + if (!cfs_gettok(src, '@', &addrrange)) goto failed; - if (cfs_gettok(src, '@', &net) == 0 || src->ls_str != NULL) + if (!cfs_gettok(src, '@', &net) || src->ls_str) goto failed; nr = add_nidrange(&net, nidlist); - if (nr == NULL) + if (!nr) goto failed; - if (parse_addrange(&addrrange, nr) != 0) + if (parse_addrange(&addrrange, nr)) goto failed; return 1; failed: - CWARN("can't parse nidrange: \"%.*s\"\n", tmp.ls_len, tmp.ls_str); return 0; } @@ -342,12 +343,12 @@ cfs_parse_nidlist(char *str, int len, struct list_head *nidlist) INIT_LIST_HEAD(nidlist); while (src.ls_str) { rc = cfs_gettok(&src, ' ', &res); - if (rc == 0) { + if (!rc) { cfs_free_nidlist(nidlist); return 0; } rc = parse_nidrange(&res, nidlist); - if (rc == 0) { + if (!rc) { cfs_free_nidlist(nidlist); return 0; } @@ -378,7 +379,7 @@ int cfs_match_nid(lnet_nid_t nid, struct list_head *nidlist) return 1; list_for_each_entry(ar, &nr->nr_addrranges, ar_link) if (nr->nr_netstrfns->nf_match_addr(LNET_NIDADDR(nid), - &ar->ar_numaddr_ranges)) + &ar->ar_numaddr_ranges)) return 1; } return 0; @@ -395,7 +396,7 @@ cfs_print_network(char *buffer, int count, struct nidrange *nr) { struct netstrfns *nf = nr->nr_netstrfns; - if (nr->nr_netnum == 0) + if (!nr->nr_netnum) return scnprintf(buffer, count, "@%s", nf->nf_name); else return scnprintf(buffer, count, "@%s%u", @@ -417,7 +418,7 @@ cfs_print_addrranges(char *buffer, int count, struct list_head *addrranges, struct netstrfns *nf = nr->nr_netstrfns; list_for_each_entry(ar, addrranges, ar_link) { - if (i != 0) + if (i) i += scnprintf(buffer + i, count - i, " "); i += nf->nf_print_addrlist(buffer + i, count - i, &ar->ar_numaddr_ranges); @@ -442,10 +443,10 @@ int cfs_print_nidlist(char *buffer, int count, struct list_head *nidlist) return 0; list_for_each_entry(nr, nidlist, nr_link) { - if (i != 0) + if (i) i += scnprintf(buffer + i, count - i, " "); - if (nr->nr_all != 0) { + if (nr->nr_all) { LASSERT(list_empty(&nr->nr_addrranges)); i += scnprintf(buffer + i, count - i, "*"); i += cfs_print_network(buffer + i, count - i, nr); @@ -487,13 +488,13 @@ static void cfs_ip_ar_min_max(struct addrrange *ar, __u32 *min_nid, tmp_ip_addr = ((min_ip[0] << 24) | (min_ip[1] << 16) | (min_ip[2] << 8) | min_ip[3]); - if (min_nid != NULL) + if (min_nid) *min_nid = tmp_ip_addr; tmp_ip_addr = ((max_ip[0] << 24) | (max_ip[1] << 16) | (max_ip[2] << 8) | max_ip[3]); - if (max_nid != NULL) + if (max_nid) *max_nid = tmp_ip_addr; } @@ -515,16 +516,16 @@ static void cfs_num_ar_min_max(struct addrrange *ar, __u32 *min_nid, list_for_each_entry(el, &ar->ar_numaddr_ranges, el_link) { list_for_each_entry(re, &el->el_exprs, re_link) { - if (re->re_lo < min_addr || min_addr == 0) + if (re->re_lo < min_addr || !min_addr) min_addr = re->re_lo; if (re->re_hi > max_addr) max_addr = re->re_hi; } } - if (min_nid != NULL) + if (min_nid) *min_nid = min_addr; - if (max_nid != NULL) + if (max_nid) *max_nid = max_addr; } @@ -546,17 +547,17 @@ bool cfs_nidrange_is_contiguous(struct list_head *nidlist) list_for_each_entry(nr, nidlist, nr_link) { nf = nr->nr_netstrfns; - if (lndname == NULL) + if (!lndname) lndname = nf->nf_name; if (netnum == -1) netnum = nr->nr_netnum; - if (strcmp(lndname, nf->nf_name) != 0 || + if (strcmp(lndname, nf->nf_name) || netnum != nr->nr_netnum) return false; } - if (nf == NULL) + if (!nf) return false; if (!nf->nf_is_contiguous(nidlist)) @@ -590,7 +591,7 @@ static bool cfs_num_is_contiguous(struct list_head *nidlist) list_for_each_entry(ar, &nr->nr_addrranges, ar_link) { cfs_num_ar_min_max(ar, ¤t_start_nid, ¤t_end_nid); - if (last_end_nid != 0 && + if (last_end_nid && (current_start_nid - last_end_nid != 1)) return false; last_end_nid = current_end_nid; @@ -600,7 +601,7 @@ static bool cfs_num_is_contiguous(struct list_head *nidlist) re_link) { if (re->re_stride > 1) return false; - else if (last_hi != 0 && + else if (last_hi && re->re_hi - last_hi != 1) return false; last_hi = re->re_hi; @@ -640,7 +641,7 @@ static bool cfs_ip_is_contiguous(struct list_head *nidlist) last_diff = 0; cfs_ip_ar_min_max(ar, ¤t_start_nid, ¤t_end_nid); - if (last_end_nid != 0 && + if (last_end_nid && (current_start_nid - last_end_nid != 1)) return false; last_end_nid = current_end_nid; @@ -724,7 +725,7 @@ static void cfs_num_min_max(struct list_head *nidlist, __u32 *min_nid, list_for_each_entry(ar, &nr->nr_addrranges, ar_link) { cfs_num_ar_min_max(ar, &tmp_min_addr, &tmp_max_addr); - if (tmp_min_addr < min_addr || min_addr == 0) + if (tmp_min_addr < min_addr || !min_addr) min_addr = tmp_min_addr; if (tmp_max_addr > max_addr) max_addr = tmp_min_addr; @@ -756,16 +757,16 @@ static void cfs_ip_min_max(struct list_head *nidlist, __u32 *min_nid, list_for_each_entry(ar, &nr->nr_addrranges, ar_link) { cfs_ip_ar_min_max(ar, &tmp_min_ip_addr, &tmp_max_ip_addr); - if (tmp_min_ip_addr < min_ip_addr || min_ip_addr == 0) + if (tmp_min_ip_addr < min_ip_addr || !min_ip_addr) min_ip_addr = tmp_min_ip_addr; if (tmp_max_ip_addr > max_ip_addr) max_ip_addr = tmp_max_ip_addr; } } - if (min_nid != NULL) + if (min_nid) *min_nid = min_ip_addr; - if (max_nid != NULL) + if (max_nid) *max_nid = max_ip_addr; } @@ -784,12 +785,14 @@ libcfs_ip_addr2str(__u32 addr, char *str, size_t size) (addr >> 8) & 0xff, addr & 0xff); } -/* CAVEAT EMPTOR XscanfX +/* + * CAVEAT EMPTOR XscanfX * I use "%n" at the end of a sscanf format to detect trailing junk. However * sscanf may return immediately if it sees the terminating '0' in a string, so * I initialise the %n variable to the expected length. If sscanf sets it; * fine, if it doesn't, then the scan ended at the end of the string, which is - * fine too :) */ + * fine too :) + */ static int libcfs_ip_str2addr(const char *str, int nob, __u32 *addr) { @@ -802,9 +805,9 @@ libcfs_ip_str2addr(const char *str, int nob, __u32 *addr) /* numeric IP? */ if (sscanf(str, "%u.%u.%u.%u%n", &a, &b, &c, &d, &n) >= 4 && n == nob && - (a & ~0xff) == 0 && (b & ~0xff) == 0 && - (c & ~0xff) == 0 && (d & ~0xff) == 0) { - *addr = ((a<<24)|(b<<16)|(c<<8)|d); + !(a & ~0xff) && !(b & ~0xff) && + !(c & ~0xff) && !(d & ~0xff)) { + *addr = ((a << 24) | (b << 16) | (c << 8) | d); return 1; } @@ -824,7 +827,7 @@ cfs_ip_addr_parse(char *str, int len, struct list_head *list) src.ls_len = len; i = 0; - while (src.ls_str != NULL) { + while (src.ls_str) { struct cfs_lstr res; if (!cfs_gettok(&src, '.', &res)) { @@ -833,7 +836,7 @@ cfs_ip_addr_parse(char *str, int len, struct list_head *list) } rc = cfs_expr_list_parse(res.ls_str, res.ls_len, 0, 255, &el); - if (rc != 0) + if (rc) goto out; list_add_tail(&el->el_link, list); @@ -858,7 +861,7 @@ libcfs_ip_addr_range_print(char *buffer, int count, struct list_head *list) list_for_each_entry(el, list, el_link) { LASSERT(j++ < 4); - if (i != 0) + if (i) i += scnprintf(buffer + i, count - i, "."); i += cfs_expr_list_print(buffer + i, count - i, el); } @@ -928,7 +931,7 @@ libcfs_num_parse(char *str, int len, struct list_head *list) int rc; rc = cfs_expr_list_parse(str, len, 0, MAX_NUMERIC_VALUE, &el); - if (rc == 0) + if (!rc) list_add_tail(&el->el_link, list); return rc; @@ -1060,7 +1063,7 @@ libcfs_name2netstrfns(const char *name) int libcfs_isknown_lnd(__u32 lnd) { - return libcfs_lnd2netstrfns(lnd) != NULL; + return !!libcfs_lnd2netstrfns(lnd); } EXPORT_SYMBOL(libcfs_isknown_lnd); @@ -1069,7 +1072,7 @@ libcfs_lnd2modname(__u32 lnd) { struct netstrfns *nf = libcfs_lnd2netstrfns(lnd); - return (nf == NULL) ? NULL : nf->nf_modname; + return nf ? nf->nf_modname : NULL; } EXPORT_SYMBOL(libcfs_lnd2modname); @@ -1078,10 +1081,10 @@ libcfs_str2lnd(const char *str) { struct netstrfns *nf = libcfs_name2netstrfns(str); - if (nf != NULL) + if (nf) return nf->nf_type; - return -1; + return -ENXIO; } EXPORT_SYMBOL(libcfs_str2lnd); @@ -1091,7 +1094,7 @@ libcfs_lnd2str_r(__u32 lnd, char *buf, size_t buf_size) struct netstrfns *nf; nf = libcfs_lnd2netstrfns(lnd); - if (nf == NULL) + if (!nf) snprintf(buf, buf_size, "?%u?", lnd); else snprintf(buf, buf_size, "%s", nf->nf_name); @@ -1108,9 +1111,9 @@ libcfs_net2str_r(__u32 net, char *buf, size_t buf_size) struct netstrfns *nf; nf = libcfs_lnd2netstrfns(lnd); - if (nf == NULL) + if (!nf) snprintf(buf, buf_size, "<%u:%u>", lnd, nnum); - else if (nnum == 0) + else if (!nnum) snprintf(buf, buf_size, "%s", nf->nf_name); else snprintf(buf, buf_size, "%s%u", nf->nf_name, nnum); @@ -1135,14 +1138,14 @@ libcfs_nid2str_r(lnet_nid_t nid, char *buf, size_t buf_size) } nf = libcfs_lnd2netstrfns(lnd); - if (nf == NULL) + if (!nf) { snprintf(buf, buf_size, "%x@<%u:%u>", addr, lnd, nnum); - else { + } else { size_t addr_len; nf->nf_addr2str(addr, buf, buf_size); addr_len = strlen(buf); - if (nnum == 0) + if (!nnum) snprintf(buf + addr_len, buf_size - addr_len, "@%s", nf->nf_name); else @@ -1195,7 +1198,7 @@ libcfs_str2net(const char *str) { __u32 net; - if (libcfs_str2net_internal(str, &net) != NULL) + if (libcfs_str2net_internal(str, &net)) return net; return LNET_NIDNET(LNET_NID_ANY); @@ -1210,15 +1213,15 @@ libcfs_str2nid(const char *str) __u32 net; __u32 addr; - if (sep != NULL) { + if (sep) { nf = libcfs_str2net_internal(sep + 1, &net); - if (nf == NULL) + if (!nf) return LNET_NID_ANY; } else { sep = str + strlen(str); net = LNET_MKNET(SOCKLND, 0); nf = libcfs_lnd2netstrfns(SOCKLND); - LASSERT(nf != NULL); + LASSERT(nf); } if (!nf->nf_str2addr(str, (int)(sep - str), &addr)) @@ -1240,8 +1243,8 @@ libcfs_id2str(lnet_process_id_t id) } snprintf(str, LNET_NIDSTR_SIZE, "%s%u-%s", - ((id.pid & LNET_PID_USERFLAG) != 0) ? "U" : "", - (id.pid & ~LNET_PID_USERFLAG), libcfs_nid2str(id.nid)); + id.pid & LNET_PID_USERFLAG ? "U" : "", + id.pid & ~LNET_PID_USERFLAG, libcfs_nid2str(id.nid)); return str; } EXPORT_SYMBOL(libcfs_id2str); diff --git a/drivers/staging/lustre/lnet/lnet/peer.c b/drivers/staging/lustre/lnet/lnet/peer.c index 1fceed3c8..b026feebc 100644 --- a/drivers/staging/lustre/lnet/lnet/peer.c +++ b/drivers/staging/lustre/lnet/lnet/peer.c @@ -39,6 +39,7 @@ #define DEBUG_SUBSYSTEM S_LNET #include "../../include/linux/lnet/lib-lnet.h" +#include "../../include/linux/lnet/lib-dlc.h" int lnet_peer_tables_create(void) @@ -50,7 +51,7 @@ lnet_peer_tables_create(void) the_lnet.ln_peer_tables = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*ptable)); - if (the_lnet.ln_peer_tables == NULL) { + if (!the_lnet.ln_peer_tables) { CERROR("Failed to allocate cpu-partition peer tables\n"); return -ENOMEM; } @@ -60,7 +61,7 @@ lnet_peer_tables_create(void) LIBCFS_CPT_ALLOC(hash, lnet_cpt_table(), i, LNET_PEER_HASH_SIZE * sizeof(*hash)); - if (hash == NULL) { + if (!hash) { CERROR("Failed to create peer hash table\n"); lnet_peer_tables_destroy(); return -ENOMEM; @@ -82,12 +83,12 @@ lnet_peer_tables_destroy(void) int i; int j; - if (the_lnet.ln_peer_tables == NULL) + if (!the_lnet.ln_peer_tables) return; cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { hash = ptable->pt_hash; - if (hash == NULL) /* not initialized */ + if (!hash) /* not initialized */ break; LASSERT(list_empty(&ptable->pt_deathrow)); @@ -103,62 +104,116 @@ lnet_peer_tables_destroy(void) the_lnet.ln_peer_tables = NULL; } +static void +lnet_peer_table_cleanup_locked(lnet_ni_t *ni, struct lnet_peer_table *ptable) +{ + int i; + lnet_peer_t *lp; + lnet_peer_t *tmp; + + for (i = 0; i < LNET_PEER_HASH_SIZE; i++) { + list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i], + lp_hashlist) { + if (ni && ni != lp->lp_ni) + continue; + list_del_init(&lp->lp_hashlist); + /* Lose hash table's ref */ + ptable->pt_zombies++; + lnet_peer_decref_locked(lp); + } + } +} + +static void +lnet_peer_table_deathrow_wait_locked(struct lnet_peer_table *ptable, + int cpt_locked) +{ + int i; + + for (i = 3; ptable->pt_zombies; i++) { + lnet_net_unlock(cpt_locked); + + if (is_power_of_2(i)) { + CDEBUG(D_WARNING, + "Waiting for %d zombies on peer table\n", + ptable->pt_zombies); + } + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(cfs_time_seconds(1) >> 1); + lnet_net_lock(cpt_locked); + } +} + +static void +lnet_peer_table_del_rtrs_locked(lnet_ni_t *ni, struct lnet_peer_table *ptable, + int cpt_locked) +{ + lnet_peer_t *lp; + lnet_peer_t *tmp; + lnet_nid_t lp_nid; + int i; + + for (i = 0; i < LNET_PEER_HASH_SIZE; i++) { + list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i], + lp_hashlist) { + if (ni != lp->lp_ni) + continue; + + if (!lp->lp_rtr_refcount) + continue; + + lp_nid = lp->lp_nid; + + lnet_net_unlock(cpt_locked); + lnet_del_route(LNET_NIDNET(LNET_NID_ANY), lp_nid); + lnet_net_lock(cpt_locked); + } + } +} + void -lnet_peer_tables_cleanup(void) +lnet_peer_tables_cleanup(lnet_ni_t *ni) { struct lnet_peer_table *ptable; + struct list_head deathrow; + lnet_peer_t *lp; + lnet_peer_t *temp; int i; - int j; - LASSERT(the_lnet.ln_shutdown); /* i.e. no new peers */ + INIT_LIST_HEAD(&deathrow); + LASSERT(the_lnet.ln_shutdown || ni); + /* + * If just deleting the peers for a NI, get rid of any routes these + * peers are gateways for. + */ cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { lnet_net_lock(i); - - for (j = 0; j < LNET_PEER_HASH_SIZE; j++) { - struct list_head *peers = &ptable->pt_hash[j]; - - while (!list_empty(peers)) { - lnet_peer_t *lp = list_entry(peers->next, - lnet_peer_t, - lp_hashlist); - list_del_init(&lp->lp_hashlist); - /* lose hash table's ref */ - lnet_peer_decref_locked(lp); - } - } - + lnet_peer_table_del_rtrs_locked(ni, ptable, i); lnet_net_unlock(i); } + /* + * Start the process of moving the applicable peers to + * deathrow. + */ cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { - LIST_HEAD(deathrow); - lnet_peer_t *lp; - lnet_net_lock(i); + lnet_peer_table_cleanup_locked(ni, ptable); + lnet_net_unlock(i); + } - for (j = 3; ptable->pt_number != 0; j++) { - lnet_net_unlock(i); - - if ((j & (j - 1)) == 0) { - CDEBUG(D_WARNING, - "Waiting for %d peers on peer table\n", - ptable->pt_number); - } - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1) / 2); - lnet_net_lock(i); - } + /* Cleanup all entries on deathrow. */ + cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { + lnet_net_lock(i); + lnet_peer_table_deathrow_wait_locked(ptable, i); list_splice_init(&ptable->pt_deathrow, &deathrow); - lnet_net_unlock(i); + } - while (!list_empty(&deathrow)) { - lp = list_entry(deathrow.next, - lnet_peer_t, lp_hashlist); - list_del(&lp->lp_hashlist); - LIBCFS_FREE(lp, sizeof(*lp)); - } + list_for_each_entry_safe(lp, temp, &deathrow, lp_hashlist) { + list_del(&lp->lp_hashlist); + LIBCFS_FREE(lp, sizeof(*lp)); } } @@ -167,11 +222,11 @@ lnet_destroy_peer_locked(lnet_peer_t *lp) { struct lnet_peer_table *ptable; - LASSERT(lp->lp_refcount == 0); - LASSERT(lp->lp_rtr_refcount == 0); + LASSERT(!lp->lp_refcount); + LASSERT(!lp->lp_rtr_refcount); LASSERT(list_empty(&lp->lp_txq)); LASSERT(list_empty(&lp->lp_hashlist)); - LASSERT(lp->lp_txqnob == 0); + LASSERT(!lp->lp_txqnob); ptable = the_lnet.ln_peer_tables[lp->lp_cpt]; LASSERT(ptable->pt_number > 0); @@ -181,6 +236,8 @@ lnet_destroy_peer_locked(lnet_peer_t *lp) lp->lp_ni = NULL; list_add(&lp->lp_hashlist, &ptable->pt_deathrow); + LASSERT(ptable->pt_zombies > 0); + ptable->pt_zombies--; } lnet_peer_t * @@ -220,14 +277,14 @@ lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt) ptable = the_lnet.ln_peer_tables[cpt2]; lp = lnet_find_peer_locked(ptable, nid); - if (lp != NULL) { + if (lp) { *lpp = lp; return 0; } if (!list_empty(&ptable->pt_deathrow)) { lp = list_entry(ptable->pt_deathrow.next, - lnet_peer_t, lp_hashlist); + lnet_peer_t, lp_hashlist); list_del(&lp->lp_hashlist); } @@ -238,12 +295,12 @@ lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt) ptable->pt_number++; lnet_net_unlock(cpt); - if (lp != NULL) + if (lp) memset(lp, 0, sizeof(*lp)); else LIBCFS_CPT_ALLOC(lp, lnet_cpt_table(), cpt2, sizeof(*lp)); - if (lp == NULL) { + if (!lp) { rc = -ENOMEM; lnet_net_lock(cpt); goto out; @@ -276,30 +333,30 @@ lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt) } lp2 = lnet_find_peer_locked(ptable, nid); - if (lp2 != NULL) { + if (lp2) { *lpp = lp2; goto out; } lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid), cpt2); - if (lp->lp_ni == NULL) { + if (!lp->lp_ni) { rc = -EHOSTUNREACH; goto out; } - lp->lp_txcredits = - lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits; - lp->lp_rtrcredits = + lp->lp_txcredits = lp->lp_ni->ni_peertxcredits; + lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits; + lp->lp_rtrcredits = lnet_peer_buffer_credits(lp->lp_ni); lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni); list_add_tail(&lp->lp_hashlist, - &ptable->pt_hash[lnet_nid2peerhash(nid)]); + &ptable->pt_hash[lnet_nid2peerhash(nid)]); ptable->pt_version++; *lpp = lp; return 0; out: - if (lp != NULL) + if (lp) list_add(&lp->lp_hashlist, &ptable->pt_deathrow); ptable->pt_number--; return rc; @@ -317,7 +374,7 @@ lnet_debug_peer(lnet_nid_t nid) lnet_net_lock(cpt); rc = lnet_nid2peer_locked(&lp, nid, cpt); - if (rc != 0) { + if (rc) { lnet_net_unlock(cpt); CDEBUG(D_WARNING, "No peer %s\n", libcfs_nid2str(nid)); return; @@ -336,3 +393,65 @@ lnet_debug_peer(lnet_nid_t nid) lnet_net_unlock(cpt); } + +int +lnet_get_peer_info(__u32 peer_index, __u64 *nid, + char aliveness[LNET_MAX_STR_LEN], + __u32 *cpt_iter, __u32 *refcount, + __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits, + __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credits, + __u32 *peer_tx_qnob) +{ + struct lnet_peer_table *peer_table; + lnet_peer_t *lp; + bool found = false; + int lncpt, j; + + /* get the number of CPTs */ + lncpt = cfs_percpt_number(the_lnet.ln_peer_tables); + + /* + * if the cpt number to be examined is >= the number of cpts in + * the system then indicate that there are no more cpts to examin + */ + if (*cpt_iter >= lncpt) + return -ENOENT; + + /* get the current table */ + peer_table = the_lnet.ln_peer_tables[*cpt_iter]; + /* if the ptable is NULL then there are no more cpts to examine */ + if (!peer_table) + return -ENOENT; + + lnet_net_lock(*cpt_iter); + + for (j = 0; j < LNET_PEER_HASH_SIZE && !found; j++) { + struct list_head *peers = &peer_table->pt_hash[j]; + + list_for_each_entry(lp, peers, lp_hashlist) { + if (peer_index-- > 0) + continue; + + snprintf(aliveness, LNET_MAX_STR_LEN, "NA"); + if (lnet_isrouter(lp) || + lnet_peer_aliveness_enabled(lp)) + snprintf(aliveness, LNET_MAX_STR_LEN, + lp->lp_alive ? "up" : "down"); + + *nid = lp->lp_nid; + *refcount = lp->lp_refcount; + *ni_peer_tx_credits = lp->lp_ni->ni_peertxcredits; + *peer_tx_credits = lp->lp_txcredits; + *peer_rtr_credits = lp->lp_rtrcredits; + *peer_min_rtr_credits = lp->lp_mintxcredits; + *peer_tx_qnob = lp->lp_txqnob; + + found = true; + } + } + lnet_net_unlock(*cpt_iter); + + *cpt_iter = lncpt; + + return found ? 0 : -ENOENT; +} diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c index f5faa414d..b01dc424c 100644 --- a/drivers/staging/lustre/lnet/lnet/router.c +++ b/drivers/staging/lustre/lnet/lnet/router.c @@ -15,10 +15,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * */ #define DEBUG_SUBSYSTEM S_LNET @@ -28,8 +24,11 @@ #define LNET_NRB_TINY (LNET_NRB_TINY_MIN * 4) #define LNET_NRB_SMALL_MIN 4096 /* min value for each CPT */ #define LNET_NRB_SMALL (LNET_NRB_SMALL_MIN * 4) +#define LNET_NRB_SMALL_PAGES 1 #define LNET_NRB_LARGE_MIN 256 /* min value for each CPT */ #define LNET_NRB_LARGE (LNET_NRB_LARGE_MIN * 4) +#define LNET_NRB_LARGE_PAGES ((LNET_MTU + PAGE_SIZE - 1) >> \ + PAGE_SHIFT) static char *forwarding = ""; module_param(forwarding, charp, 0444); @@ -61,8 +60,10 @@ lnet_peer_buffer_credits(lnet_ni_t *ni) if (peer_buffer_credits > 0) return peer_buffer_credits; - /* As an approximation, allow this peer the same number of router - * buffers as it is allowed outstanding sends */ + /* + * As an approximation, allow this peer the same number of router + * buffers as it is allowed outstanding sends + */ return ni->ni_peertxcredits; } @@ -107,7 +108,7 @@ lnet_notify_locked(lnet_peer_t *lp, int notifylnd, int alive, lp->lp_timestamp = when; /* update timestamp */ lp->lp_ping_deadline = 0; /* disable ping timeout */ - if (lp->lp_alive_count != 0 && /* got old news */ + if (lp->lp_alive_count && /* got old news */ (!lp->lp_alive) == (!alive)) { /* new date for old news */ CDEBUG(D_NET, "Old news\n"); return; @@ -131,11 +132,12 @@ lnet_ni_notify_locked(lnet_ni_t *ni, lnet_peer_t *lp) int alive; int notifylnd; - /* Notify only in 1 thread at any time to ensure ordered notification. + /* + * Notify only in 1 thread at any time to ensure ordered notification. * NB individual events can be missed; the only guarantee is that you - * always get the most recent news */ - - if (lp->lp_notifying || ni == NULL) + * always get the most recent news + */ + if (lp->lp_notifying || !ni) return; lp->lp_notifying = 1; @@ -147,13 +149,14 @@ lnet_ni_notify_locked(lnet_ni_t *ni, lnet_peer_t *lp) lp->lp_notifylnd = 0; lp->lp_notify = 0; - if (notifylnd && ni->ni_lnd->lnd_notify != NULL) { + if (notifylnd && ni->ni_lnd->lnd_notify) { lnet_net_unlock(lp->lp_cpt); - /* A new notification could happen now; I'll handle it - * when control returns to me */ - - (ni->ni_lnd->lnd_notify)(ni, lp->lp_nid, alive); + /* + * A new notification could happen now; I'll handle it + * when control returns to me + */ + ni->ni_lnd->lnd_notify(ni, lp->lp_nid, alive); lnet_net_lock(lp->lp_cpt); } @@ -176,7 +179,7 @@ lnet_rtr_addref_locked(lnet_peer_t *lp) /* a simple insertion sort */ list_for_each_prev(pos, &the_lnet.ln_routers) { lnet_peer_t *rtr = list_entry(pos, lnet_peer_t, - lp_rtr_list); + lp_rtr_list); if (rtr->lp_nid < lp->lp_nid) break; @@ -197,12 +200,12 @@ lnet_rtr_decref_locked(lnet_peer_t *lp) /* lnet_net_lock must be exclusively locked */ lp->lp_rtr_refcount--; - if (lp->lp_rtr_refcount == 0) { + if (!lp->lp_rtr_refcount) { LASSERT(list_empty(&lp->lp_routes)); - if (lp->lp_rcd != NULL) { + if (lp->lp_rcd) { list_add(&lp->lp_rcd->rcd_list, - &the_lnet.ln_rcd_deathrow); + &the_lnet.ln_rcd_deathrow); lp->lp_rcd = NULL; } @@ -245,8 +248,10 @@ static void lnet_shuffle_seed(void) cfs_get_random_bytes(seed, sizeof(seed)); - /* Nodes with small feet have little entropy - * the NID for this node gives the most entropy in the low bits */ + /* + * Nodes with small feet have little entropy + * the NID for this node gives the most entropy in the low bits + */ list_for_each(tmp, &the_lnet.ln_nis) { ni = list_entry(tmp, lnet_ni_t, ni_list); lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid)); @@ -277,7 +282,7 @@ lnet_add_route_to_rnet(lnet_remotenet_t *rnet, lnet_route_t *route) /* len+1 positions to add a new entry, also prevents division by 0 */ offset = cfs_rand() % (len + 1); list_for_each(e, &rnet->lrn_routes) { - if (offset == 0) + if (!offset) break; offset--; } @@ -289,7 +294,7 @@ lnet_add_route_to_rnet(lnet_remotenet_t *rnet, lnet_route_t *route) } int -lnet_add_route(__u32 net, unsigned int hops, lnet_nid_t gateway, +lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway, unsigned int priority) { struct list_head *e; @@ -300,7 +305,7 @@ lnet_add_route(__u32 net, unsigned int hops, lnet_nid_t gateway, int add_route; int rc; - CDEBUG(D_NET, "Add route: net %s hops %u priority %u gw %s\n", + CDEBUG(D_NET, "Add route: net %s hops %d priority %u gw %s\n", libcfs_net2str(net), hops, priority, libcfs_nid2str(gateway)); if (gateway == LNET_NID_ANY || @@ -308,21 +313,21 @@ lnet_add_route(__u32 net, unsigned int hops, lnet_nid_t gateway, net == LNET_NIDNET(LNET_NID_ANY) || LNET_NETTYP(net) == LOLND || LNET_NIDNET(gateway) == net || - hops < 1 || hops > 255) + (hops != LNET_UNDEFINED_HOPS && (hops < 1 || hops > 255))) return -EINVAL; if (lnet_islocalnet(net)) /* it's a local network */ - return 0; /* ignore the route entry */ + return -EEXIST; /* Assume net, route, all new */ LIBCFS_ALLOC(route, sizeof(*route)); LIBCFS_ALLOC(rnet, sizeof(*rnet)); - if (route == NULL || rnet == NULL) { + if (!route || !rnet) { CERROR("Out of memory creating route %s %d %s\n", libcfs_net2str(net), hops, libcfs_nid2str(gateway)); - if (route != NULL) + if (route) LIBCFS_FREE(route, sizeof(*route)); - if (rnet != NULL) + if (rnet) LIBCFS_FREE(rnet, sizeof(*rnet)); return -ENOMEM; } @@ -336,25 +341,24 @@ lnet_add_route(__u32 net, unsigned int hops, lnet_nid_t gateway, lnet_net_lock(LNET_LOCK_EX); rc = lnet_nid2peer_locked(&route->lr_gateway, gateway, LNET_LOCK_EX); - if (rc != 0) { + if (rc) { lnet_net_unlock(LNET_LOCK_EX); LIBCFS_FREE(route, sizeof(*route)); LIBCFS_FREE(rnet, sizeof(*rnet)); if (rc == -EHOSTUNREACH) /* gateway is not on a local net */ - return 0; /* ignore the route entry */ + return rc; /* ignore the route entry */ CERROR("Error %d creating route %s %d %s\n", rc, libcfs_net2str(net), hops, libcfs_nid2str(gateway)); - return rc; } LASSERT(!the_lnet.ln_shutdown); rnet2 = lnet_find_net_locked(net); - if (rnet2 == NULL) { + if (!rnet2) { /* new network */ list_add_tail(&rnet->lrn_list, lnet_net2rnethash(net)); rnet2 = rnet; @@ -382,8 +386,8 @@ lnet_add_route(__u32 net, unsigned int hops, lnet_nid_t gateway, lnet_net_unlock(LNET_LOCK_EX); /* XXX Assume alive */ - if (ni->ni_lnd->lnd_notify != NULL) - (ni->ni_lnd->lnd_notify)(ni, gateway, 1); + if (ni->ni_lnd->lnd_notify) + ni->ni_lnd->lnd_notify(ni, gateway, 1); lnet_net_lock(LNET_LOCK_EX); } @@ -391,14 +395,20 @@ lnet_add_route(__u32 net, unsigned int hops, lnet_nid_t gateway, /* -1 for notify or !add_route */ lnet_peer_decref_locked(route->lr_gateway); lnet_net_unlock(LNET_LOCK_EX); + rc = 0; - if (!add_route) + if (!add_route) { + rc = -EEXIST; LIBCFS_FREE(route, sizeof(*route)); + } if (rnet != rnet2) LIBCFS_FREE(rnet, sizeof(*rnet)); - return 0; + /* indicate to startup the router checker if configured */ + wake_up(&the_lnet.ln_rc_waitq); + + return rc; } int @@ -426,10 +436,9 @@ lnet_check_routes(void) lnet_nid_t nid2; int net; - route = list_entry(e2, lnet_route_t, - lr_list); + route = list_entry(e2, lnet_route_t, lr_list); - if (route2 == NULL) { + if (!route2) { route2 = route; continue; } @@ -472,9 +481,10 @@ lnet_del_route(__u32 net, lnet_nid_t gw_nid) CDEBUG(D_NET, "Del route: net %s : gw %s\n", libcfs_net2str(net), libcfs_nid2str(gw_nid)); - /* NB Caller may specify either all routes via the given gateway - * or a specific route entry actual NIDs) */ - + /* + * NB Caller may specify either all routes via the given gateway + * or a specific route entry actual NIDs) + */ lnet_net_lock(LNET_LOCK_EX); if (net == LNET_NIDNET(LNET_NID_ANY)) rn_list = &the_lnet.ln_remote_nets_hash[0]; @@ -486,7 +496,7 @@ lnet_del_route(__u32 net, lnet_nid_t gw_nid) rnet = list_entry(e1, lnet_remotenet_t, lrn_list); if (!(net == LNET_NIDNET(LNET_NID_ANY) || - net == rnet->lrn_net)) + net == rnet->lrn_net)) continue; list_for_each(e2, &rnet->lrn_routes) { @@ -513,7 +523,7 @@ lnet_del_route(__u32 net, lnet_nid_t gw_nid) LIBCFS_FREE(route, sizeof(*route)); - if (rnet != NULL) + if (rnet) LIBCFS_FREE(rnet, sizeof(*rnet)); rc = 0; @@ -538,6 +548,38 @@ lnet_destroy_routes(void) lnet_del_route(LNET_NIDNET(LNET_NID_ANY), LNET_NID_ANY); } +int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg) +{ + int i, rc = -ENOENT, j; + + if (!the_lnet.ln_rtrpools) + return rc; + + for (i = 0; i < LNET_NRBPOOLS; i++) { + lnet_rtrbufpool_t *rbp; + + lnet_net_lock(LNET_LOCK_EX); + cfs_percpt_for_each(rbp, j, the_lnet.ln_rtrpools) { + if (i++ != idx) + continue; + + pool_cfg->pl_pools[i].pl_npages = rbp[i].rbp_npages; + pool_cfg->pl_pools[i].pl_nbuffers = rbp[i].rbp_nbuffers; + pool_cfg->pl_pools[i].pl_credits = rbp[i].rbp_credits; + pool_cfg->pl_pools[i].pl_mincredits = rbp[i].rbp_mincredits; + rc = 0; + break; + } + lnet_net_unlock(LNET_LOCK_EX); + } + + lnet_net_lock(LNET_LOCK_EX); + pool_cfg->pl_routing = the_lnet.ln_routing; + lnet_net_unlock(LNET_LOCK_EX); + + return rc; +} + int lnet_get_route(int idx, __u32 *net, __u32 *hops, lnet_nid_t *gateway, __u32 *alive, __u32 *priority) @@ -558,15 +600,14 @@ lnet_get_route(int idx, __u32 *net, __u32 *hops, rnet = list_entry(e1, lnet_remotenet_t, lrn_list); list_for_each(e2, &rnet->lrn_routes) { - route = list_entry(e2, lnet_route_t, - lr_list); + route = list_entry(e2, lnet_route_t, lr_list); - if (idx-- == 0) { + if (!idx--) { *net = rnet->lrn_net; *hops = route->lr_hops; *priority = route->lr_priority; *gateway = route->lr_gateway->lp_nid; - *alive = route->lr_gateway->lp_alive; + *alive = lnet_is_route_alive(route); lnet_net_unlock(cpt); return 0; } @@ -604,7 +645,7 @@ lnet_parse_rc_info(lnet_rc_data_t *rcd) { lnet_ping_info_t *info = rcd->rcd_pinginfo; struct lnet_peer *gw = rcd->rcd_gateway; - lnet_route_t *rtr; + lnet_route_t *rte; if (!gw->lp_alive) return; @@ -621,21 +662,25 @@ lnet_parse_rc_info(lnet_rc_data_t *rcd) } gw->lp_ping_feats = info->pi_features; - if ((gw->lp_ping_feats & LNET_PING_FEAT_MASK) == 0) { + if (!(gw->lp_ping_feats & LNET_PING_FEAT_MASK)) { CDEBUG(D_NET, "%s: Unexpected features 0x%x\n", libcfs_nid2str(gw->lp_nid), gw->lp_ping_feats); return; /* nothing I can understand */ } - if ((gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS) == 0) + if (!(gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS)) return; /* can't carry NI status info */ - list_for_each_entry(rtr, &gw->lp_routes, lr_gwlist) { - int ptl_status = LNET_NI_STATUS_INVALID; + list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) { int down = 0; int up = 0; int i; + if (gw->lp_ping_feats & LNET_PING_FEAT_RTE_DISABLED) { + rte->lr_downis = 1; + continue; + } + for (i = 0; i < info->pi_nnis && i < LNET_MAX_RTR_NIS; i++) { lnet_ni_status_t *stat = &info->pi_ni[i]; lnet_nid_t nid = stat->ns_nid; @@ -651,22 +696,15 @@ lnet_parse_rc_info(lnet_rc_data_t *rcd) continue; if (stat->ns_status == LNET_NI_STATUS_DOWN) { - if (LNET_NETTYP(LNET_NIDNET(nid)) != PTLLND) - down++; - else if (ptl_status != LNET_NI_STATUS_UP) - ptl_status = LNET_NI_STATUS_DOWN; + down++; continue; } if (stat->ns_status == LNET_NI_STATUS_UP) { - if (LNET_NIDNET(nid) == rtr->lr_net) { + if (LNET_NIDNET(nid) == rte->lr_net) { up = 1; break; } - /* ptl NIs are considered down only when - * they're all down */ - if (LNET_NETTYP(LNET_NIDNET(nid)) == PTLLND) - ptl_status = LNET_NI_STATUS_UP; continue; } @@ -677,10 +715,17 @@ lnet_parse_rc_info(lnet_rc_data_t *rcd) } if (up) { /* ignore downed NIs if NI for dest network is up */ - rtr->lr_downis = 0; + rte->lr_downis = 0; continue; } - rtr->lr_downis = down + (ptl_status == LNET_NI_STATUS_DOWN); + /** + * if @down is zero and this route is single-hop, it means + * we can't find NI for target network + */ + if (!down && rte->lr_hops == 1) + down = 1; + + rte->lr_downis = down; } } @@ -690,7 +735,7 @@ lnet_router_checker_event(lnet_event_t *event) lnet_rc_data_t *rcd = event->md.user_ptr; struct lnet_peer *lp; - LASSERT(rcd != NULL); + LASSERT(rcd); if (event->unlinked) { LNetInvalidateHandle(&rcd->rcd_mdh); @@ -701,11 +746,13 @@ lnet_router_checker_event(lnet_event_t *event) event->type == LNET_EVENT_REPLY); lp = rcd->rcd_gateway; - LASSERT(lp != NULL); + LASSERT(lp); - /* NB: it's called with holding lnet_res_lock, we have a few - * places need to hold both locks at the same time, please take - * care of lock ordering */ + /* + * NB: it's called with holding lnet_res_lock, we have a few + * places need to hold both locks at the same time, please take + * care of lock ordering + */ lnet_net_lock(lp->lp_cpt); if (!lnet_isrouter(lp) || lp->lp_rcd != rcd) { /* ignore if no longer a router or rcd is replaced */ @@ -714,23 +761,26 @@ lnet_router_checker_event(lnet_event_t *event) if (event->type == LNET_EVENT_SEND) { lp->lp_ping_notsent = 0; - if (event->status == 0) + if (!event->status) goto out; } /* LNET_EVENT_REPLY */ - /* A successful REPLY means the router is up. If _any_ comms + /* + * A successful REPLY means the router is up. If _any_ comms * to the router fail I assume it's down (this will happen if * we ping alive routers to try to detect router death before - * apps get burned). */ + * apps get burned). + */ + lnet_notify_locked(lp, 1, !event->status, cfs_time_current()); - lnet_notify_locked(lp, 1, (event->status == 0), cfs_time_current()); - /* The router checker will wake up very shortly and do the + /* + * The router checker will wake up very shortly and do the * actual notification. * XXX If 'lp' stops being a router before then, it will still - * have the notification pending!!! */ - - if (avoid_asym_router_failure && event->status == 0) + * have the notification pending!!! + */ + if (avoid_asym_router_failure && !event->status) lnet_parse_rc_info(rcd); out: @@ -753,7 +803,7 @@ lnet_wait_known_routerstate(void) list_for_each(entry, &the_lnet.ln_routers) { rtr = list_entry(entry, lnet_peer_t, lp_rtr_list); - if (rtr->lp_alive_count == 0) { + if (!rtr->lp_alive_count) { all_known = 0; break; } @@ -774,7 +824,7 @@ lnet_router_ni_update_locked(lnet_peer_t *gw, __u32 net) { lnet_route_t *rte; - if ((gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS) != 0) { + if ((gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS)) { list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) { if (rte->lr_net == net) { rte->lr_downis = 0; @@ -811,13 +861,15 @@ lnet_update_ni_status_locked(void) continue; } - LASSERT(ni->ni_status != NULL); + LASSERT(ni->ni_status); if (ni->ni_status->ns_status != LNET_NI_STATUS_DOWN) { CDEBUG(D_NET, "NI(%s:%d) status changed to down\n", libcfs_nid2str(ni->ni_nid), timeout); - /* NB: so far, this is the only place to set - * NI status to "down" */ + /* + * NB: so far, this is the only place to set + * NI status to "down" + */ ni->ni_status->ns_status = LNET_NI_STATUS_DOWN; } lnet_ni_unlock(ni); @@ -831,7 +883,7 @@ lnet_destroy_rc_data(lnet_rc_data_t *rcd) /* detached from network */ LASSERT(LNetHandleIsInvalid(rcd->rcd_mdh)); - if (rcd->rcd_gateway != NULL) { + if (rcd->rcd_gateway) { int cpt = rcd->rcd_gateway->lp_cpt; lnet_net_lock(cpt); @@ -839,7 +891,7 @@ lnet_destroy_rc_data(lnet_rc_data_t *rcd) lnet_net_unlock(cpt); } - if (rcd->rcd_pinginfo != NULL) + if (rcd->rcd_pinginfo) LIBCFS_FREE(rcd->rcd_pinginfo, LNET_PINGINFO_SIZE); LIBCFS_FREE(rcd, sizeof(*rcd)); @@ -856,14 +908,14 @@ lnet_create_rc_data_locked(lnet_peer_t *gateway) lnet_net_unlock(gateway->lp_cpt); LIBCFS_ALLOC(rcd, sizeof(*rcd)); - if (rcd == NULL) + if (!rcd) goto out; LNetInvalidateHandle(&rcd->rcd_mdh); INIT_LIST_HEAD(&rcd->rcd_list); LIBCFS_ALLOC(pi, LNET_PINGINFO_SIZE); - if (pi == NULL) + if (!pi) goto out; for (i = 0; i < LNET_MAX_RTR_NIS; i++) { @@ -885,11 +937,11 @@ lnet_create_rc_data_locked(lnet_peer_t *gateway) CERROR("Can't bind MD: %d\n", rc); goto out; } - LASSERT(rc == 0); + LASSERT(!rc); lnet_net_lock(gateway->lp_cpt); /* router table changed or someone has created rcd for this gateway */ - if (!lnet_isrouter(gateway) || gateway->lp_rcd != NULL) { + if (!lnet_isrouter(gateway) || gateway->lp_rcd) { lnet_net_unlock(gateway->lp_cpt); goto out; } @@ -902,10 +954,10 @@ lnet_create_rc_data_locked(lnet_peer_t *gateway) return rcd; out: - if (rcd != NULL) { + if (rcd) { if (!LNetHandleIsInvalid(rcd->rcd_mdh)) { rc = LNetMDUnlink(rcd->rcd_mdh); - LASSERT(rc == 0); + LASSERT(!rc); } lnet_destroy_rc_data(rcd); } @@ -936,7 +988,7 @@ lnet_ping_router_locked(lnet_peer_t *rtr) lnet_peer_addref_locked(rtr); - if (rtr->lp_ping_deadline != 0 && /* ping timed out? */ + if (rtr->lp_ping_deadline && /* ping timed out? */ cfs_time_after(now, rtr->lp_ping_deadline)) lnet_notify_locked(rtr, 1, 0, now); @@ -950,10 +1002,10 @@ lnet_ping_router_locked(lnet_peer_t *rtr) return; } - rcd = rtr->lp_rcd != NULL ? + rcd = rtr->lp_rcd ? rtr->lp_rcd : lnet_create_rc_data_locked(rtr); - if (rcd == NULL) + if (!rcd) return; secs = lnet_router_check_interval(rtr); @@ -964,7 +1016,7 @@ lnet_ping_router_locked(lnet_peer_t *rtr) rtr->lp_ping_deadline, rtr->lp_ping_notsent, rtr->lp_alive, rtr->lp_alive_count, rtr->lp_ping_timestamp); - if (secs != 0 && !rtr->lp_ping_notsent && + if (secs && !rtr->lp_ping_notsent && cfs_time_after(now, cfs_time_add(rtr->lp_ping_timestamp, cfs_time_seconds(secs)))) { int rc; @@ -972,7 +1024,7 @@ lnet_ping_router_locked(lnet_peer_t *rtr) lnet_handle_md_t mdh; id.nid = rtr->lp_nid; - id.pid = LUSTRE_SRV_LNET_PID; + id.pid = LNET_PID_LUSTRE; CDEBUG(D_NET, "Check: %s\n", libcfs_id2str(id)); rtr->lp_ping_notsent = 1; @@ -980,7 +1032,7 @@ lnet_ping_router_locked(lnet_peer_t *rtr) mdh = rcd->rcd_mdh; - if (rtr->lp_ping_deadline == 0) { + if (!rtr->lp_ping_deadline) { rtr->lp_ping_deadline = cfs_time_shift(router_ping_timeout); } @@ -991,7 +1043,7 @@ lnet_ping_router_locked(lnet_peer_t *rtr) LNET_PROTO_PING_MATCHBITS, 0); lnet_net_lock(rtr->lp_cpt); - if (rc != 0) + if (rc) rtr->lp_ping_notsent = 0; /* no event pending */ } @@ -1001,8 +1053,9 @@ lnet_ping_router_locked(lnet_peer_t *rtr) int lnet_router_checker_start(void) { + struct task_struct *task; int rc; - int eqsz; + int eqsz = 0; LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN); @@ -1012,39 +1065,33 @@ lnet_router_checker_start(void) return -EINVAL; } - if (!the_lnet.ln_routing && - live_router_check_interval <= 0 && - dead_router_check_interval <= 0) - return 0; - sema_init(&the_lnet.ln_rc_signal, 0); - /* EQ size doesn't matter; the callback is guaranteed to get every - * event */ - eqsz = 0; - rc = LNetEQAlloc(eqsz, lnet_router_checker_event, - &the_lnet.ln_rc_eqh); - if (rc != 0) { + + rc = LNetEQAlloc(0, lnet_router_checker_event, &the_lnet.ln_rc_eqh); + if (rc) { CERROR("Can't allocate EQ(%d): %d\n", eqsz, rc); return -ENOMEM; } the_lnet.ln_rc_state = LNET_RC_STATE_RUNNING; - rc = PTR_ERR(kthread_run(lnet_router_checker, - NULL, "router_checker")); - if (IS_ERR_VALUE(rc)) { + task = kthread_run(lnet_router_checker, NULL, "router_checker"); + if (IS_ERR(task)) { + rc = PTR_ERR(task); CERROR("Can't start router checker thread: %d\n", rc); /* block until event callback signals exit */ down(&the_lnet.ln_rc_signal); rc = LNetEQFree(the_lnet.ln_rc_eqh); - LASSERT(rc == 0); + LASSERT(!rc); the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN; return -ENOMEM; } if (check_routers_before_use) { - /* Note that a helpful side-effect of pinging all known routers + /* + * Note that a helpful side-effect of pinging all known routers * at startup is that it makes them drop stale connections they - * may have to a previous instance of me. */ + * may have to a previous instance of me. + */ lnet_wait_known_routerstate(); } @@ -1061,13 +1108,15 @@ lnet_router_checker_stop(void) LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING); the_lnet.ln_rc_state = LNET_RC_STATE_STOPPING; + /* wakeup the RC thread if it's sleeping */ + wake_up(&the_lnet.ln_rc_waitq); /* block until event callback signals exit */ down(&the_lnet.ln_rc_signal); LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN); rc = LNetEQFree(the_lnet.ln_rc_eqh); - LASSERT(rc == 0); + LASSERT(!rc); } static void @@ -1091,13 +1140,13 @@ lnet_prune_rc_data(int wait_unlink) if (the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) { /* router checker is stopping, prune all */ list_for_each_entry(lp, &the_lnet.ln_routers, - lp_rtr_list) { - if (lp->lp_rcd == NULL) + lp_rtr_list) { + if (!lp->lp_rcd) continue; LASSERT(list_empty(&lp->lp_rcd->rcd_list)); list_add(&lp->lp_rcd->rcd_list, - &the_lnet.ln_rcd_deathrow); + &the_lnet.ln_rcd_deathrow); lp->lp_rcd = NULL; } } @@ -1119,7 +1168,7 @@ lnet_prune_rc_data(int wait_unlink) /* release all zombie RCDs */ while (!list_empty(&the_lnet.ln_rcd_zombie)) { list_for_each_entry_safe(rcd, tmp, &the_lnet.ln_rcd_zombie, - rcd_list) { + rcd_list) { if (LNetHandleIsInvalid(rcd->rcd_mdh)) list_move(&rcd->rcd_list, &head); } @@ -1131,7 +1180,7 @@ lnet_prune_rc_data(int wait_unlink) while (!list_empty(&head)) { rcd = list_entry(head.next, - lnet_rc_data_t, rcd_list); + lnet_rc_data_t, rcd_list); list_del_init(&rcd->rcd_list); lnet_destroy_rc_data(rcd); } @@ -1151,6 +1200,33 @@ lnet_prune_rc_data(int wait_unlink) lnet_net_unlock(LNET_LOCK_EX); } +/* + * This function is called to check if the RC should block indefinitely. + * It's called from lnet_router_checker() as well as being passed to + * wait_event_interruptible() to avoid the lost wake_up problem. + * + * When it's called from wait_event_interruptible() it is necessary to + * also not sleep if the rc state is not running to avoid a deadlock + * when the system is shutting down + */ +static inline bool +lnet_router_checker_active(void) +{ + if (the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) + return true; + + /* + * Router Checker thread needs to run when routing is enabled in + * order to call lnet_update_ni_status_locked() + */ + if (the_lnet.ln_routing) + return true; + + return !list_empty(&the_lnet.ln_routers) && + (live_router_check_interval > 0 || + dead_router_check_interval > 0); +} + static int lnet_router_checker(void *arg) { @@ -1159,8 +1235,6 @@ lnet_router_checker(void *arg) cfs_block_allsigs(); - LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING); - while (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING) { __u64 version; int cpt; @@ -1199,15 +1273,25 @@ rescan: lnet_prune_rc_data(0); /* don't wait for UNLINK */ - /* Call schedule_timeout() here always adds 1 to load average + /* + * Call schedule_timeout() here always adds 1 to load average * because kernel counts # active tasks as nr_running - * + nr_uninterruptible. */ - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1)); + * + nr_uninterruptible. + */ + /* + * if there are any routes then wakeup every second. If + * there are no routes then sleep indefinitely until woken + * up by a user adding a route + */ + if (!lnet_router_checker_active()) + wait_event_interruptible(the_lnet.ln_rc_waitq, + lnet_router_checker_active()); + else + wait_event_interruptible_timeout(the_lnet.ln_rc_waitq, + false, + cfs_time_seconds(1)); } - LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_STOPPING); - lnet_prune_rc_data(1); /* wait for UNLINK */ the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN; @@ -1216,7 +1300,7 @@ rescan: return 0; } -static void +void lnet_destroy_rtrbuf(lnet_rtrbuf_t *rb, int npages) { int sz = offsetof(lnet_rtrbuf_t, rb_kiov[npages]); @@ -1237,7 +1321,7 @@ lnet_new_rtrbuf(lnet_rtrbufpool_t *rbp, int cpt) int i; LIBCFS_CPT_ALLOC(rb, lnet_cpt_table(), cpt, sz); - if (rb == NULL) + if (!rb) return NULL; rb->rb_pool = rbp; @@ -1246,7 +1330,7 @@ lnet_new_rtrbuf(lnet_rtrbufpool_t *rbp, int cpt) page = alloc_pages_node( cfs_cpt_spread_node(lnet_cpt_table(), cpt), GFP_KERNEL | __GFP_ZERO, 0); - if (page == NULL) { + if (!page) { while (--i >= 0) __free_page(rb->rb_kiov[i].kiov_page); @@ -1254,7 +1338,7 @@ lnet_new_rtrbuf(lnet_rtrbufpool_t *rbp, int cpt) return NULL; } - rb->rb_kiov[i].kiov_len = PAGE_CACHE_SIZE; + rb->rb_kiov[i].kiov_len = PAGE_SIZE; rb->rb_kiov[i].kiov_offset = 0; rb->rb_kiov[i].kiov_page = page; } @@ -1263,66 +1347,119 @@ lnet_new_rtrbuf(lnet_rtrbufpool_t *rbp, int cpt) } static void -lnet_rtrpool_free_bufs(lnet_rtrbufpool_t *rbp) +lnet_rtrpool_free_bufs(lnet_rtrbufpool_t *rbp, int cpt) { int npages = rbp->rbp_npages; - int nbuffers = 0; + struct list_head tmp; lnet_rtrbuf_t *rb; + lnet_rtrbuf_t *temp; - if (rbp->rbp_nbuffers == 0) /* not initialized or already freed */ + if (!rbp->rbp_nbuffers) /* not initialized or already freed */ return; - LASSERT(list_empty(&rbp->rbp_msgs)); - LASSERT(rbp->rbp_credits == rbp->rbp_nbuffers); + INIT_LIST_HEAD(&tmp); - while (!list_empty(&rbp->rbp_bufs)) { - LASSERT(rbp->rbp_credits > 0); + lnet_net_lock(cpt); + lnet_drop_routed_msgs_locked(&rbp->rbp_msgs, cpt); + list_splice_init(&rbp->rbp_bufs, &tmp); + rbp->rbp_req_nbuffers = 0; + rbp->rbp_nbuffers = 0; + rbp->rbp_credits = 0; + rbp->rbp_mincredits = 0; + lnet_net_unlock(cpt); - rb = list_entry(rbp->rbp_bufs.next, - lnet_rtrbuf_t, rb_list); + /* Free buffers on the free list. */ + list_for_each_entry_safe(rb, temp, &tmp, rb_list) { list_del(&rb->rb_list); lnet_destroy_rtrbuf(rb, npages); - nbuffers++; } - - LASSERT(rbp->rbp_nbuffers == nbuffers); - LASSERT(rbp->rbp_credits == nbuffers); - - rbp->rbp_nbuffers = rbp->rbp_credits = 0; } static int -lnet_rtrpool_alloc_bufs(lnet_rtrbufpool_t *rbp, int nbufs, int cpt) +lnet_rtrpool_adjust_bufs(lnet_rtrbufpool_t *rbp, int nbufs, int cpt) { + struct list_head rb_list; lnet_rtrbuf_t *rb; - int i; + int num_rb; + int num_buffers = 0; + int old_req_nbufs; + int npages = rbp->rbp_npages; - if (rbp->rbp_nbuffers != 0) { - LASSERT(rbp->rbp_nbuffers == nbufs); + lnet_net_lock(cpt); + /* + * If we are called for less buffers than already in the pool, we + * just lower the req_nbuffers number and excess buffers will be + * thrown away as they are returned to the free list. Credits + * then get adjusted as well. + * If we already have enough buffers allocated to serve the + * increase requested, then we can treat that the same way as we + * do the decrease. + */ + num_rb = nbufs - rbp->rbp_nbuffers; + if (nbufs <= rbp->rbp_req_nbuffers || num_rb <= 0) { + rbp->rbp_req_nbuffers = nbufs; + lnet_net_unlock(cpt); return 0; } + /* + * store the older value of rbp_req_nbuffers and then set it to + * the new request to prevent lnet_return_rx_credits_locked() from + * freeing buffers that we need to keep around + */ + old_req_nbufs = rbp->rbp_req_nbuffers; + rbp->rbp_req_nbuffers = nbufs; + lnet_net_unlock(cpt); - for (i = 0; i < nbufs; i++) { + INIT_LIST_HEAD(&rb_list); + + /* + * allocate the buffers on a local list first. If all buffers are + * allocated successfully then join this list to the rbp buffer + * list. If not then free all allocated buffers. + */ + while (num_rb-- > 0) { rb = lnet_new_rtrbuf(rbp, cpt); + if (!rb) { + CERROR("Failed to allocate %d route bufs of %d pages\n", + nbufs, npages); - if (rb == NULL) { - CERROR("Failed to allocate %d router bufs of %d pages\n", - nbufs, rbp->rbp_npages); - return -ENOMEM; - } + lnet_net_lock(cpt); + rbp->rbp_req_nbuffers = old_req_nbufs; + lnet_net_unlock(cpt); - rbp->rbp_nbuffers++; - rbp->rbp_credits++; - rbp->rbp_mincredits++; - list_add(&rb->rb_list, &rbp->rbp_bufs); + goto failed; + } - /* No allocation "under fire" */ - /* Otherwise we'd need code to schedule blocked msgs etc */ - LASSERT(!the_lnet.ln_routing); + list_add(&rb->rb_list, &rb_list); + num_buffers++; } - LASSERT(rbp->rbp_credits == nbufs); + lnet_net_lock(cpt); + + list_splice_tail(&rb_list, &rbp->rbp_bufs); + rbp->rbp_nbuffers += num_buffers; + rbp->rbp_credits += num_buffers; + rbp->rbp_mincredits = rbp->rbp_credits; + /* + * We need to schedule blocked msg using the newly + * added buffers. + */ + while (!list_empty(&rbp->rbp_bufs) && + !list_empty(&rbp->rbp_msgs)) + lnet_schedule_blocked_locked(rbp); + + lnet_net_unlock(cpt); + return 0; + +failed: + while (!list_empty(&rb_list)) { + rb = list_entry(rb_list.next, lnet_rtrbuf_t, rb_list); + list_del(&rb->rb_list); + lnet_destroy_rtrbuf(rb, npages); + } + + return -ENOMEM; } static void @@ -1337,26 +1474,28 @@ lnet_rtrpool_init(lnet_rtrbufpool_t *rbp, int npages) } void -lnet_rtrpools_free(void) +lnet_rtrpools_free(int keep_pools) { lnet_rtrbufpool_t *rtrp; int i; - if (the_lnet.ln_rtrpools == NULL) /* uninitialized or freed */ + if (!the_lnet.ln_rtrpools) /* uninitialized or freed */ return; cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) { - lnet_rtrpool_free_bufs(&rtrp[0]); - lnet_rtrpool_free_bufs(&rtrp[1]); - lnet_rtrpool_free_bufs(&rtrp[2]); + lnet_rtrpool_free_bufs(&rtrp[LNET_TINY_BUF_IDX], i); + lnet_rtrpool_free_bufs(&rtrp[LNET_SMALL_BUF_IDX], i); + lnet_rtrpool_free_bufs(&rtrp[LNET_LARGE_BUF_IDX], i); } - cfs_percpt_free(the_lnet.ln_rtrpools); - the_lnet.ln_rtrpools = NULL; + if (!keep_pools) { + cfs_percpt_free(the_lnet.ln_rtrpools); + the_lnet.ln_rtrpools = NULL; + } } static int -lnet_nrb_tiny_calculate(int npages) +lnet_nrb_tiny_calculate(void) { int nrbs = LNET_NRB_TINY; @@ -1364,7 +1503,7 @@ lnet_nrb_tiny_calculate(int npages) LCONSOLE_ERROR_MSG(0x10c, "tiny_router_buffers=%d invalid when routing enabled\n", tiny_router_buffers); - return -1; + return -EINVAL; } if (tiny_router_buffers > 0) @@ -1375,7 +1514,7 @@ lnet_nrb_tiny_calculate(int npages) } static int -lnet_nrb_small_calculate(int npages) +lnet_nrb_small_calculate(void) { int nrbs = LNET_NRB_SMALL; @@ -1383,7 +1522,7 @@ lnet_nrb_small_calculate(int npages) LCONSOLE_ERROR_MSG(0x10c, "small_router_buffers=%d invalid when routing enabled\n", small_router_buffers); - return -1; + return -EINVAL; } if (small_router_buffers > 0) @@ -1394,7 +1533,7 @@ lnet_nrb_small_calculate(int npages) } static int -lnet_nrb_large_calculate(int npages) +lnet_nrb_large_calculate(void) { int nrbs = LNET_NRB_LARGE; @@ -1402,7 +1541,7 @@ lnet_nrb_large_calculate(int npages) LCONSOLE_ERROR_MSG(0x10c, "large_router_buffers=%d invalid when routing enabled\n", large_router_buffers); - return -1; + return -EINVAL; } if (large_router_buffers > 0) @@ -1416,16 +1555,12 @@ int lnet_rtrpools_alloc(int im_a_router) { lnet_rtrbufpool_t *rtrp; - int large_pages; - int small_pages = 1; int nrb_tiny; int nrb_small; int nrb_large; int rc; int i; - large_pages = (LNET_MTU + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (!strcmp(forwarding, "")) { /* not set either way */ if (!im_a_router) @@ -1440,41 +1575,46 @@ lnet_rtrpools_alloc(int im_a_router) return -EINVAL; } - nrb_tiny = lnet_nrb_tiny_calculate(0); + nrb_tiny = lnet_nrb_tiny_calculate(); if (nrb_tiny < 0) return -EINVAL; - nrb_small = lnet_nrb_small_calculate(small_pages); + nrb_small = lnet_nrb_small_calculate(); if (nrb_small < 0) return -EINVAL; - nrb_large = lnet_nrb_large_calculate(large_pages); + nrb_large = lnet_nrb_large_calculate(); if (nrb_large < 0) return -EINVAL; the_lnet.ln_rtrpools = cfs_percpt_alloc(lnet_cpt_table(), LNET_NRBPOOLS * sizeof(lnet_rtrbufpool_t)); - if (the_lnet.ln_rtrpools == NULL) { + if (!the_lnet.ln_rtrpools) { LCONSOLE_ERROR_MSG(0x10c, "Failed to initialize router buffe pool\n"); return -ENOMEM; } cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) { - lnet_rtrpool_init(&rtrp[0], 0); - rc = lnet_rtrpool_alloc_bufs(&rtrp[0], nrb_tiny, i); - if (rc != 0) + lnet_rtrpool_init(&rtrp[LNET_TINY_BUF_IDX], 0); + rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_TINY_BUF_IDX], + nrb_tiny, i); + if (rc) goto failed; - lnet_rtrpool_init(&rtrp[1], small_pages); - rc = lnet_rtrpool_alloc_bufs(&rtrp[1], nrb_small, i); - if (rc != 0) + lnet_rtrpool_init(&rtrp[LNET_SMALL_BUF_IDX], + LNET_NRB_SMALL_PAGES); + rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_SMALL_BUF_IDX], + nrb_small, i); + if (rc) goto failed; - lnet_rtrpool_init(&rtrp[2], large_pages); - rc = lnet_rtrpool_alloc_bufs(&rtrp[2], nrb_large, i); - if (rc != 0) + lnet_rtrpool_init(&rtrp[LNET_LARGE_BUF_IDX], + LNET_NRB_LARGE_PAGES); + rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_LARGE_BUF_IDX], + nrb_large, i); + if (rc) goto failed; } @@ -1485,10 +1625,118 @@ lnet_rtrpools_alloc(int im_a_router) return 0; failed: - lnet_rtrpools_free(); + lnet_rtrpools_free(0); return rc; } +static int +lnet_rtrpools_adjust_helper(int tiny, int small, int large) +{ + int nrb = 0; + int rc = 0; + int i; + lnet_rtrbufpool_t *rtrp; + + /* + * If the provided values for each buffer pool are different than the + * configured values, we need to take action. + */ + if (tiny >= 0) { + tiny_router_buffers = tiny; + nrb = lnet_nrb_tiny_calculate(); + cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) { + rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_TINY_BUF_IDX], + nrb, i); + if (rc) + return rc; + } + } + if (small >= 0) { + small_router_buffers = small; + nrb = lnet_nrb_small_calculate(); + cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) { + rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_SMALL_BUF_IDX], + nrb, i); + if (rc) + return rc; + } + } + if (large >= 0) { + large_router_buffers = large; + nrb = lnet_nrb_large_calculate(); + cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) { + rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_LARGE_BUF_IDX], + nrb, i); + if (rc) + return rc; + } + } + + return 0; +} + +int +lnet_rtrpools_adjust(int tiny, int small, int large) +{ + /* + * this function doesn't revert the changes if adding new buffers + * failed. It's up to the user space caller to revert the + * changes. + */ + if (!the_lnet.ln_routing) + return 0; + + return lnet_rtrpools_adjust_helper(tiny, small, large); +} + +int +lnet_rtrpools_enable(void) +{ + int rc; + + if (the_lnet.ln_routing) + return 0; + + if (!the_lnet.ln_rtrpools) + /* + * If routing is turned off, and we have never + * initialized the pools before, just call the + * standard buffer pool allocation routine as + * if we are just configuring this for the first + * time. + */ + return lnet_rtrpools_alloc(1); + + rc = lnet_rtrpools_adjust_helper(0, 0, 0); + if (rc) + return rc; + + lnet_net_lock(LNET_LOCK_EX); + the_lnet.ln_routing = 1; + + the_lnet.ln_ping_info->pi_features &= ~LNET_PING_FEAT_RTE_DISABLED; + lnet_net_unlock(LNET_LOCK_EX); + + return 0; +} + +void +lnet_rtrpools_disable(void) +{ + if (!the_lnet.ln_routing) + return; + + lnet_net_lock(LNET_LOCK_EX); + the_lnet.ln_routing = 0; + the_lnet.ln_ping_info->pi_features |= LNET_PING_FEAT_RTE_DISABLED; + + tiny_router_buffers = 0; + small_router_buffers = 0; + large_router_buffers = 0; + lnet_net_unlock(LNET_LOCK_EX); + lnet_rtrpools_free(1); +} + int lnet_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive, unsigned long when) { @@ -1499,28 +1747,28 @@ lnet_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive, unsigned long when) LASSERT(!in_interrupt()); CDEBUG(D_NET, "%s notifying %s: %s\n", - (ni == NULL) ? "userspace" : libcfs_nid2str(ni->ni_nid), - libcfs_nid2str(nid), - alive ? "up" : "down"); + !ni ? "userspace" : libcfs_nid2str(ni->ni_nid), + libcfs_nid2str(nid), + alive ? "up" : "down"); - if (ni != NULL && + if (ni && LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid)) { CWARN("Ignoring notification of %s %s by %s (different net)\n", - libcfs_nid2str(nid), alive ? "birth" : "death", - libcfs_nid2str(ni->ni_nid)); + libcfs_nid2str(nid), alive ? "birth" : "death", + libcfs_nid2str(ni->ni_nid)); return -EINVAL; } /* can't do predictions... */ if (cfs_time_after(when, now)) { CWARN("Ignoring prediction from %s of %s %s %ld seconds in the future\n", - (ni == NULL) ? "userspace" : libcfs_nid2str(ni->ni_nid), + !ni ? "userspace" : libcfs_nid2str(ni->ni_nid), libcfs_nid2str(nid), alive ? "up" : "down", cfs_duration_sec(cfs_time_sub(when, now))); return -EINVAL; } - if (ni != NULL && !alive && /* LND telling me she's down */ + if (ni && !alive && /* LND telling me she's down */ !auto_down) { /* auto-down disabled */ CDEBUG(D_NET, "Auto-down disabled\n"); return 0; @@ -1534,23 +1782,26 @@ lnet_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive, unsigned long when) } lp = lnet_find_peer_locked(the_lnet.ln_peer_tables[cpt], nid); - if (lp == NULL) { + if (!lp) { /* nid not found */ lnet_net_unlock(cpt); CDEBUG(D_NET, "%s not found\n", libcfs_nid2str(nid)); return 0; } - /* We can't fully trust LND on reporting exact peer last_alive + /* + * We can't fully trust LND on reporting exact peer last_alive * if he notifies us about dead peer. For example ksocklnd can * call us with when == _time_when_the_node_was_booted_ if - * no connections were successfully established */ - if (ni != NULL && !alive && when < lp->lp_last_alive) + * no connections were successfully established + */ + if (ni && !alive && when < lp->lp_last_alive) when = lp->lp_last_alive; - lnet_notify_locked(lp, ni == NULL, alive, when); + lnet_notify_locked(lp, !ni, alive, when); - lnet_ni_notify_locked(ni, lp); + if (ni) + lnet_ni_notify_locked(ni, lp); lnet_peer_decref_locked(lp); diff --git a/drivers/staging/lustre/lnet/lnet/router_proc.c b/drivers/staging/lustre/lnet/lnet/router_proc.c index 396c7c4e5..65f65a3fc 100644 --- a/drivers/staging/lustre/lnet/lnet/router_proc.c +++ b/drivers/staging/lustre/lnet/lnet/router_proc.c @@ -15,18 +15,16 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * */ #define DEBUG_SUBSYSTEM S_LNET #include "../../include/linux/libcfs/libcfs.h" #include "../../include/linux/lnet/lib-lnet.h" -/* This is really lnet_proc.c. You might need to update sanity test 215 - * if any file format is changed. */ +/* + * This is really lnet_proc.c. You might need to update sanity test 215 + * if any file format is changed. + */ #define LNET_LOFFT_BITS (sizeof(loff_t) * 8) /* @@ -75,25 +73,6 @@ #define LNET_PROC_VERSION(v) ((unsigned int)((v) & LNET_PROC_VER_MASK)) -static int proc_call_handler(void *data, int write, loff_t *ppos, - void __user *buffer, size_t *lenp, - int (*handler)(void *data, int write, - loff_t pos, void __user *buffer, int len)) -{ - int rc = handler(data, write, *ppos, buffer, *lenp); - - if (rc < 0) - return rc; - - if (write) { - *ppos += *lenp; - } else { - *lenp = rc; - *ppos += rc; - } - return 0; -} - static int __proc_lnet_stats(void *data, int write, loff_t pos, void __user *buffer, int nob) { @@ -111,11 +90,11 @@ static int __proc_lnet_stats(void *data, int write, /* read */ LIBCFS_ALLOC(ctrs, sizeof(*ctrs)); - if (ctrs == NULL) + if (!ctrs) return -ENOMEM; LIBCFS_ALLOC(tmpstr, tmpsiz); - if (tmpstr == NULL) { + if (!tmpstr) { LIBCFS_FREE(ctrs, sizeof(*ctrs)); return -ENOMEM; } @@ -145,8 +124,8 @@ static int __proc_lnet_stats(void *data, int write, static int proc_lnet_stats(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - return proc_call_handler(table->data, write, ppos, buffer, lenp, - __proc_lnet_stats); + return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, + __proc_lnet_stats); } static int proc_lnet_routes(struct ctl_table *table, int write, @@ -167,16 +146,16 @@ static int proc_lnet_routes(struct ctl_table *table, int write, LASSERT(!write); - if (*lenp == 0) + if (!*lenp) return 0; LIBCFS_ALLOC(tmpstr, tmpsiz); - if (tmpstr == NULL) + if (!tmpstr) return -ENOMEM; s = tmpstr; /* points to current position in tmpstr[] */ - if (*ppos == 0) { + if (!*ppos) { s += snprintf(s, tmpstr + tmpsiz - s, "Routing %s\n", the_lnet.ln_routing ? "enabled" : "disabled"); LASSERT(tmpstr + tmpsiz - s > 0); @@ -206,23 +185,22 @@ static int proc_lnet_routes(struct ctl_table *table, int write, return -ESTALE; } - for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE && route == NULL; - i++) { + for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE && !route; i++) { rn_list = &the_lnet.ln_remote_nets_hash[i]; n = rn_list->next; - while (n != rn_list && route == NULL) { + while (n != rn_list && !route) { rnet = list_entry(n, lnet_remotenet_t, - lrn_list); + lrn_list); r = rnet->lrn_routes.next; while (r != &rnet->lrn_routes) { lnet_route_t *re = list_entry(r, lnet_route_t, - lr_list); - if (skip == 0) { + lr_list); + if (!skip) { route = re; break; } @@ -235,12 +213,12 @@ static int proc_lnet_routes(struct ctl_table *table, int write, } } - if (route != NULL) { + if (route) { __u32 net = rnet->lrn_net; - unsigned int hops = route->lr_hops; + __u32 hops = route->lr_hops; unsigned int priority = route->lr_priority; lnet_nid_t nid = route->lr_gateway->lp_nid; - int alive = route->lr_gateway->lp_alive; + int alive = lnet_is_route_alive(route); s += snprintf(s, tmpstr + tmpsiz - s, "%-8s %4u %8u %7s %s\n", @@ -259,9 +237,9 @@ static int proc_lnet_routes(struct ctl_table *table, int write, if (len > *lenp) { /* linux-supplied buffer is too small */ rc = -EINVAL; } else if (len > 0) { /* wrote something */ - if (copy_to_user(buffer, tmpstr, len)) + if (copy_to_user(buffer, tmpstr, len)) { rc = -EFAULT; - else { + } else { off += 1; *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off); } @@ -269,7 +247,7 @@ static int proc_lnet_routes(struct ctl_table *table, int write, LIBCFS_FREE(tmpstr, tmpsiz); - if (rc == 0) + if (!rc) *lenp = len; return rc; @@ -291,16 +269,16 @@ static int proc_lnet_routers(struct ctl_table *table, int write, LASSERT(!write); - if (*lenp == 0) + if (!*lenp) return 0; LIBCFS_ALLOC(tmpstr, tmpsiz); - if (tmpstr == NULL) + if (!tmpstr) return -ENOMEM; s = tmpstr; /* points to current position in tmpstr[] */ - if (*ppos == 0) { + if (!*ppos) { s += snprintf(s, tmpstr + tmpsiz - s, "%-4s %7s %9s %6s %12s %9s %8s %7s %s\n", "ref", "rtr_ref", "alive_cnt", "state", @@ -330,9 +308,9 @@ static int proc_lnet_routers(struct ctl_table *table, int write, while (r != &the_lnet.ln_routers) { lnet_peer_t *lp = list_entry(r, lnet_peer_t, - lp_rtr_list); + lp_rtr_list); - if (skip == 0) { + if (!skip) { peer = lp; break; } @@ -341,7 +319,7 @@ static int proc_lnet_routers(struct ctl_table *table, int write, r = r->next; } - if (peer != NULL) { + if (peer) { lnet_nid_t nid = peer->lp_nid; unsigned long now = cfs_time_current(); unsigned long deadline = peer->lp_ping_deadline; @@ -356,19 +334,21 @@ static int proc_lnet_routers(struct ctl_table *table, int write, lnet_route_t *rtr; if ((peer->lp_ping_feats & - LNET_PING_FEAT_NI_STATUS) != 0) { + LNET_PING_FEAT_NI_STATUS)) { list_for_each_entry(rtr, &peer->lp_routes, - lr_gwlist) { - /* downis on any route should be the - * number of downis on the gateway */ - if (rtr->lr_downis != 0) { + lr_gwlist) { + /* + * downis on any route should be the + * number of downis on the gateway + */ + if (rtr->lr_downis) { down_ni = rtr->lr_downis; break; } } } - if (deadline == 0) + if (!deadline) s += snprintf(s, tmpstr + tmpsiz - s, "%-4d %7d %9d %6s %12d %9d %8s %7d %s\n", nrefs, nrtrrefs, alive_cnt, @@ -394,9 +374,9 @@ static int proc_lnet_routers(struct ctl_table *table, int write, if (len > *lenp) { /* linux-supplied buffer is too small */ rc = -EINVAL; } else if (len > 0) { /* wrote something */ - if (copy_to_user(buffer, tmpstr, len)) + if (copy_to_user(buffer, tmpstr, len)) { rc = -EFAULT; - else { + } else { off += 1; *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off); } @@ -404,7 +384,7 @@ static int proc_lnet_routers(struct ctl_table *table, int write, LIBCFS_FREE(tmpstr, tmpsiz); - if (rc == 0) + if (!rc) *lenp = len; return rc; @@ -427,7 +407,7 @@ static int proc_lnet_peers(struct ctl_table *table, int write, CLASSERT(LNET_PROC_HASH_BITS >= LNET_PEER_HASH_BITS); LASSERT(!write); - if (*lenp == 0) + if (!*lenp) return 0; if (cpt >= LNET_CPT_NUMBER) { @@ -436,12 +416,12 @@ static int proc_lnet_peers(struct ctl_table *table, int write, } LIBCFS_ALLOC(tmpstr, tmpsiz); - if (tmpstr == NULL) + if (!tmpstr) return -ENOMEM; s = tmpstr; /* points to current position in tmpstr[] */ - if (*ppos == 0) { + if (!*ppos) { s += snprintf(s, tmpstr + tmpsiz - s, "%-24s %4s %5s %5s %5s %5s %5s %5s %5s %s\n", "nid", "refs", "state", "last", "max", @@ -470,18 +450,20 @@ static int proc_lnet_peers(struct ctl_table *table, int write, } while (hash < LNET_PEER_HASH_SIZE) { - if (p == NULL) + if (!p) p = ptable->pt_hash[hash].next; while (p != &ptable->pt_hash[hash]) { lnet_peer_t *lp = list_entry(p, lnet_peer_t, - lp_hashlist); - if (skip == 0) { + lp_hashlist); + if (!skip) { peer = lp; - /* minor optimization: start from idx+1 + /* + * minor optimization: start from idx+1 * on next iteration if we've just - * drained lp_hashlist */ + * drained lp_hashlist + */ if (lp->lp_hashlist.next == &ptable->pt_hash[hash]) { hoff = 1; @@ -497,7 +479,7 @@ static int proc_lnet_peers(struct ctl_table *table, int write, p = lp->lp_hashlist.next; } - if (peer != NULL) + if (peer) break; p = NULL; @@ -505,7 +487,7 @@ static int proc_lnet_peers(struct ctl_table *table, int write, hash++; } - if (peer != NULL) { + if (peer) { lnet_nid_t nid = peer->lp_nid; int nrefs = peer->lp_refcount; int lastalive = -1; @@ -553,7 +535,7 @@ static int proc_lnet_peers(struct ctl_table *table, int write, cpt++; hash = 0; hoff = 1; - if (peer == NULL && cpt < LNET_CPT_NUMBER) + if (!peer && cpt < LNET_CPT_NUMBER) goto again; } } @@ -571,7 +553,7 @@ static int proc_lnet_peers(struct ctl_table *table, int write, LIBCFS_FREE(tmpstr, tmpsiz); - if (rc == 0) + if (!rc) *lenp = len; return rc; @@ -593,7 +575,7 @@ static int __proc_lnet_buffers(void *data, int write, /* (4 %d) * 4 * LNET_CPT_NUMBER */ tmpsiz = 64 * (LNET_NRBPOOLS + 1) * LNET_CPT_NUMBER; LIBCFS_ALLOC(tmpstr, tmpsiz); - if (tmpstr == NULL) + if (!tmpstr) return -ENOMEM; s = tmpstr; /* points to current position in tmpstr[] */ @@ -603,7 +585,7 @@ static int __proc_lnet_buffers(void *data, int write, "pages", "count", "credits", "min"); LASSERT(tmpstr + tmpsiz - s > 0); - if (the_lnet.ln_rtrpools == NULL) + if (!the_lnet.ln_rtrpools) goto out; /* I'm not a router */ for (idx = 0; idx < LNET_NRBPOOLS; idx++) { @@ -638,8 +620,8 @@ static int __proc_lnet_buffers(void *data, int write, static int proc_lnet_buffers(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - return proc_call_handler(table->data, write, ppos, buffer, lenp, - __proc_lnet_buffers); + return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, + __proc_lnet_buffers); } static int proc_lnet_nis(struct ctl_table *table, int write, @@ -653,16 +635,16 @@ static int proc_lnet_nis(struct ctl_table *table, int write, LASSERT(!write); - if (*lenp == 0) + if (!*lenp) return 0; LIBCFS_ALLOC(tmpstr, tmpsiz); - if (tmpstr == NULL) + if (!tmpstr) return -ENOMEM; s = tmpstr; /* points to current position in tmpstr[] */ - if (*ppos == 0) { + if (!*ppos) { s += snprintf(s, tmpstr + tmpsiz - s, "%-24s %6s %5s %4s %4s %4s %5s %5s %5s\n", "nid", "status", "alive", "refs", "peer", @@ -680,7 +662,7 @@ static int proc_lnet_nis(struct ctl_table *table, int write, while (n != &the_lnet.ln_nis) { lnet_ni_t *a_ni = list_entry(n, lnet_ni_t, ni_list); - if (skip == 0) { + if (!skip) { ni = a_ni; break; } @@ -689,7 +671,7 @@ static int proc_lnet_nis(struct ctl_table *table, int write, n = n->next; } - if (ni != NULL) { + if (ni) { struct lnet_tx_queue *tq; char *stat; time64_t now = ktime_get_real_seconds(); @@ -705,15 +687,17 @@ static int proc_lnet_nis(struct ctl_table *table, int write, last_alive = 0; lnet_ni_lock(ni); - LASSERT(ni->ni_status != NULL); + LASSERT(ni->ni_status); stat = (ni->ni_status->ns_status == LNET_NI_STATUS_UP) ? "up" : "down"; lnet_ni_unlock(ni); - /* we actually output credits information for - * TX queue of each partition */ + /* + * we actually output credits information for + * TX queue of each partition + */ cfs_percpt_for_each(tq, i, ni->ni_tx_queues) { - for (j = 0; ni->ni_cpts != NULL && + for (j = 0; ni->ni_cpts && j < ni->ni_ncpts; j++) { if (i == ni->ni_cpts[j]) break; @@ -722,18 +706,19 @@ static int proc_lnet_nis(struct ctl_table *table, int write, if (j == ni->ni_ncpts) continue; - if (i != 0) + if (i) lnet_net_lock(i); s += snprintf(s, tmpstr + tmpsiz - s, - "%-24s %6s %5d %4d %4d %4d %5d %5d %5d\n", - libcfs_nid2str(ni->ni_nid), stat, - last_alive, *ni->ni_refs[i], - ni->ni_peertxcredits, - ni->ni_peerrtrcredits, - tq->tq_credits_max, - tq->tq_credits, tq->tq_credits_min); - if (i != 0) + "%-24s %6s %5d %4d %4d %4d %5d %5d %5d\n", + libcfs_nid2str(ni->ni_nid), stat, + last_alive, *ni->ni_refs[i], + ni->ni_peertxcredits, + ni->ni_peerrtrcredits, + tq->tq_credits_max, + tq->tq_credits, + tq->tq_credits_min); + if (i) lnet_net_unlock(i); } LASSERT(tmpstr + tmpsiz - s > 0); @@ -755,7 +740,7 @@ static int proc_lnet_nis(struct ctl_table *table, int write, LIBCFS_FREE(tmpstr, tmpsiz); - if (rc == 0) + if (!rc) *lenp = len; return rc; @@ -795,8 +780,6 @@ static struct lnet_portal_rotors portal_rotors[] = { }, }; -extern int portal_rotor; - static int __proc_lnet_portal_rotor(void *data, int write, loff_t pos, void __user *buffer, int nob) { @@ -807,7 +790,7 @@ static int __proc_lnet_portal_rotor(void *data, int write, int i; LIBCFS_ALLOC(buf, buf_len); - if (buf == NULL) + if (!buf) return -ENOMEM; if (!write) { @@ -831,7 +814,7 @@ static int __proc_lnet_portal_rotor(void *data, int write, rc = 0; } else { rc = cfs_trace_copyout_string(buffer, nob, - buf + pos, "\n"); + buf + pos, "\n"); } goto out; } @@ -844,9 +827,9 @@ static int __proc_lnet_portal_rotor(void *data, int write, rc = -EINVAL; lnet_res_lock(0); - for (i = 0; portal_rotors[i].pr_name != NULL; i++) { - if (strncasecmp(portal_rotors[i].pr_name, tmp, - strlen(portal_rotors[i].pr_name)) == 0) { + for (i = 0; portal_rotors[i].pr_name; i++) { + if (!strncasecmp(portal_rotors[i].pr_name, tmp, + strlen(portal_rotors[i].pr_name))) { portal_rotor = portal_rotors[i].pr_value; rc = 0; break; @@ -862,8 +845,8 @@ static int proc_lnet_portal_rotor(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - return proc_call_handler(table->data, write, ppos, buffer, lenp, - __proc_lnet_portal_rotor); + return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, + __proc_lnet_portal_rotor); } static struct ctl_table lnet_table[] = { diff --git a/drivers/staging/lustre/lnet/selftest/brw_test.c b/drivers/staging/lustre/lnet/selftest/brw_test.c index 1f04cc1fc..dcb6e506f 100644 --- a/drivers/staging/lustre/lnet/selftest/brw_test.c +++ b/drivers/staging/lustre/lnet/selftest/brw_test.c @@ -51,14 +51,14 @@ MODULE_PARM_DESC(brw_inject_errors, "# data errors to inject randomly, zero by d static void brw_client_fini(sfw_test_instance_t *tsi) { - srpc_bulk_t *bulk; - sfw_test_unit_t *tsu; + srpc_bulk_t *bulk; + sfw_test_unit_t *tsu; LASSERT(tsi->tsi_is_client); list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) { bulk = tsu->tsu_private; - if (bulk == NULL) + if (!bulk) continue; srpc_free_bulk(bulk); @@ -69,38 +69,42 @@ brw_client_fini(sfw_test_instance_t *tsi) static int brw_client_init(sfw_test_instance_t *tsi) { - sfw_session_t *sn = tsi->tsi_batch->bat_session; - int flags; - int npg; - int len; - int opc; - srpc_bulk_t *bulk; - sfw_test_unit_t *tsu; - - LASSERT(sn != NULL); + sfw_session_t *sn = tsi->tsi_batch->bat_session; + int flags; + int npg; + int len; + int opc; + srpc_bulk_t *bulk; + sfw_test_unit_t *tsu; + + LASSERT(sn); LASSERT(tsi->tsi_is_client); - if ((sn->sn_features & LST_FEAT_BULK_LEN) == 0) { - test_bulk_req_t *breq = &tsi->tsi_u.bulk_v0; + if (!(sn->sn_features & LST_FEAT_BULK_LEN)) { + test_bulk_req_t *breq = &tsi->tsi_u.bulk_v0; - opc = breq->blk_opc; + opc = breq->blk_opc; flags = breq->blk_flags; - npg = breq->blk_npg; - /* NB: this is not going to work for variable page size, - * but we have to keep it for compatibility */ - len = npg * PAGE_CACHE_SIZE; + npg = breq->blk_npg; + /* + * NB: this is not going to work for variable page size, + * but we have to keep it for compatibility + */ + len = npg * PAGE_SIZE; } else { test_bulk_req_v1_t *breq = &tsi->tsi_u.bulk_v1; - /* I should never get this step if it's unknown feature - * because make_session will reject unknown feature */ - LASSERT((sn->sn_features & ~LST_FEATS_MASK) == 0); + /* + * I should never get this step if it's unknown feature + * because make_session will reject unknown feature + */ + LASSERT(!(sn->sn_features & ~LST_FEATS_MASK)); - opc = breq->blk_opc; + opc = breq->blk_opc; flags = breq->blk_flags; - len = breq->blk_len; - npg = (len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + len = breq->blk_len; + npg = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; } if (npg > LNET_MAX_IOV || npg <= 0) @@ -116,7 +120,7 @@ brw_client_init(sfw_test_instance_t *tsi) list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) { bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid), npg, len, opc == LST_BRW_READ); - if (bulk == NULL) { + if (!bulk) { brw_client_fini(tsi); return -ENOMEM; } @@ -127,9 +131,9 @@ brw_client_init(sfw_test_instance_t *tsi) return 0; } -#define BRW_POISON 0xbeefbeefbeefbeefULL -#define BRW_MAGIC 0xeeb0eeb1eeb2eeb3ULL -#define BRW_MSIZE sizeof(__u64) +#define BRW_POISON 0xbeefbeefbeefbeefULL +#define BRW_MAGIC 0xeeb0eeb1eeb2eeb3ULL +#define BRW_MSIZE sizeof(__u64) static int brw_inject_one_error(void) @@ -141,7 +145,7 @@ brw_inject_one_error(void) ktime_get_ts64(&ts); - if (((ts.tv_nsec / NSEC_PER_USEC) & 1) == 0) + if (!((ts.tv_nsec / NSEC_PER_USEC) & 1)) return 0; return brw_inject_errors--; @@ -151,9 +155,9 @@ static void brw_fill_page(struct page *pg, int pattern, __u64 magic) { char *addr = page_address(pg); - int i; + int i; - LASSERT(addr != NULL); + LASSERT(addr); if (pattern == LST_BRW_CHECK_NONE) return; @@ -163,13 +167,13 @@ brw_fill_page(struct page *pg, int pattern, __u64 magic) if (pattern == LST_BRW_CHECK_SIMPLE) { memcpy(addr, &magic, BRW_MSIZE); - addr += PAGE_CACHE_SIZE - BRW_MSIZE; + addr += PAGE_SIZE - BRW_MSIZE; memcpy(addr, &magic, BRW_MSIZE); return; } if (pattern == LST_BRW_CHECK_FULL) { - for (i = 0; i < PAGE_CACHE_SIZE / BRW_MSIZE; i++) + for (i = 0; i < PAGE_SIZE / BRW_MSIZE; i++) memcpy(addr + i * BRW_MSIZE, &magic, BRW_MSIZE); return; } @@ -180,22 +184,22 @@ brw_fill_page(struct page *pg, int pattern, __u64 magic) static int brw_check_page(struct page *pg, int pattern, __u64 magic) { - char *addr = page_address(pg); - __u64 data = 0; /* make compiler happy */ - int i; + char *addr = page_address(pg); + __u64 data = 0; /* make compiler happy */ + int i; - LASSERT(addr != NULL); + LASSERT(addr); if (pattern == LST_BRW_CHECK_NONE) return 0; if (pattern == LST_BRW_CHECK_SIMPLE) { - data = *((__u64 *) addr); + data = *((__u64 *)addr); if (data != magic) goto bad_data; - addr += PAGE_CACHE_SIZE - BRW_MSIZE; - data = *((__u64 *) addr); + addr += PAGE_SIZE - BRW_MSIZE; + data = *((__u64 *)addr); if (data != magic) goto bad_data; @@ -203,8 +207,8 @@ brw_check_page(struct page *pg, int pattern, __u64 magic) } if (pattern == LST_BRW_CHECK_FULL) { - for (i = 0; i < PAGE_CACHE_SIZE / BRW_MSIZE; i++) { - data = *(((__u64 *) addr) + i); + for (i = 0; i < PAGE_SIZE / BRW_MSIZE; i++) { + data = *(((__u64 *)addr) + i); if (data != magic) goto bad_data; } @@ -216,7 +220,7 @@ brw_check_page(struct page *pg, int pattern, __u64 magic) bad_data: CERROR("Bad data in page %p: %#llx, %#llx expected\n", - pg, data, magic); + pg, data, magic); return 1; } @@ -240,9 +244,9 @@ brw_check_bulk(srpc_bulk_t *bk, int pattern, __u64 magic) for (i = 0; i < bk->bk_niov; i++) { pg = bk->bk_iovs[i].kiov_page; - if (brw_check_page(pg, pattern, magic) != 0) { + if (brw_check_page(pg, pattern, magic)) { CERROR("Bulk page %p (%d/%d) is corrupted!\n", - pg, i, bk->bk_niov); + pg, i, bk->bk_niov); return 1; } } @@ -252,7 +256,7 @@ brw_check_bulk(srpc_bulk_t *bk, int pattern, __u64 magic) static int brw_client_prep_rpc(sfw_test_unit_t *tsu, - lnet_process_id_t dest, srpc_client_rpc_t **rpcpp) + lnet_process_id_t dest, srpc_client_rpc_t **rpcpp) { srpc_bulk_t *bulk = tsu->tsu_private; sfw_test_instance_t *tsi = tsu->tsu_instance; @@ -265,32 +269,34 @@ brw_client_prep_rpc(sfw_test_unit_t *tsu, int opc; int rc; - LASSERT(sn != NULL); - LASSERT(bulk != NULL); + LASSERT(sn); + LASSERT(bulk); - if ((sn->sn_features & LST_FEAT_BULK_LEN) == 0) { + if (!(sn->sn_features & LST_FEAT_BULK_LEN)) { test_bulk_req_t *breq = &tsi->tsi_u.bulk_v0; - opc = breq->blk_opc; + opc = breq->blk_opc; flags = breq->blk_flags; - npg = breq->blk_npg; - len = npg * PAGE_CACHE_SIZE; + npg = breq->blk_npg; + len = npg * PAGE_SIZE; } else { test_bulk_req_v1_t *breq = &tsi->tsi_u.bulk_v1; - /* I should never get this step if it's unknown feature - * because make_session will reject unknown feature */ - LASSERT((sn->sn_features & ~LST_FEATS_MASK) == 0); + /* + * I should never get this step if it's unknown feature + * because make_session will reject unknown feature + */ + LASSERT(!(sn->sn_features & ~LST_FEATS_MASK)); - opc = breq->blk_opc; + opc = breq->blk_opc; flags = breq->blk_flags; - len = breq->blk_len; - npg = (len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + len = breq->blk_len; + npg = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; } rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, npg, len, &rpc); - if (rc != 0) + if (rc) return rc; memcpy(&rpc->crpc_bulk, bulk, offsetof(srpc_bulk_t, bk_iovs[npg])); @@ -301,8 +307,8 @@ brw_client_prep_rpc(sfw_test_unit_t *tsu, req = &rpc->crpc_reqstmsg.msg_body.brw_reqst; req->brw_flags = flags; - req->brw_rw = opc; - req->brw_len = len; + req->brw_rw = opc; + req->brw_len = len; *rpcpp = rpc; return 0; @@ -318,14 +324,14 @@ brw_client_done_rpc(sfw_test_unit_t *tsu, srpc_client_rpc_t *rpc) srpc_brw_reply_t *reply = &msg->msg_body.brw_reply; srpc_brw_reqst_t *reqst = &rpc->crpc_reqstmsg.msg_body.brw_reqst; - LASSERT(sn != NULL); + LASSERT(sn); - if (rpc->crpc_status != 0) { + if (rpc->crpc_status) { CERROR("BRW RPC to %s failed with %d\n", - libcfs_id2str(rpc->crpc_dest), rpc->crpc_status); + libcfs_id2str(rpc->crpc_dest), rpc->crpc_status); if (!tsi->tsi_stopping) /* rpc could have been aborted */ atomic_inc(&sn->sn_brw_errors); - goto out; + return; } if (msg->msg_magic != SRPC_MSG_MAGIC) { @@ -334,27 +340,24 @@ brw_client_done_rpc(sfw_test_unit_t *tsu, srpc_client_rpc_t *rpc) } CDEBUG(reply->brw_status ? D_WARNING : D_NET, - "BRW RPC to %s finished with brw_status: %d\n", - libcfs_id2str(rpc->crpc_dest), reply->brw_status); + "BRW RPC to %s finished with brw_status: %d\n", + libcfs_id2str(rpc->crpc_dest), reply->brw_status); - if (reply->brw_status != 0) { + if (reply->brw_status) { atomic_inc(&sn->sn_brw_errors); rpc->crpc_status = -(int)reply->brw_status; - goto out; + return; } if (reqst->brw_rw == LST_BRW_WRITE) - goto out; + return; - if (brw_check_bulk(&rpc->crpc_bulk, reqst->brw_flags, magic) != 0) { + if (brw_check_bulk(&rpc->crpc_bulk, reqst->brw_flags, magic)) { CERROR("Bulk data from %s is corrupted!\n", - libcfs_id2str(rpc->crpc_dest)); + libcfs_id2str(rpc->crpc_dest)); atomic_inc(&sn->sn_brw_errors); rpc->crpc_status = -EBADMSG; } - -out: - return; } static void @@ -362,17 +365,17 @@ brw_server_rpc_done(struct srpc_server_rpc *rpc) { srpc_bulk_t *blk = rpc->srpc_bulk; - if (blk == NULL) + if (!blk) return; - if (rpc->srpc_status != 0) + if (rpc->srpc_status) CERROR("Bulk transfer %s %s has failed: %d\n", - blk->bk_sink ? "from" : "to", - libcfs_id2str(rpc->srpc_peer), rpc->srpc_status); + blk->bk_sink ? "from" : "to", + libcfs_id2str(rpc->srpc_peer), rpc->srpc_status); else CDEBUG(D_NET, "Transferred %d pages bulk data %s %s\n", - blk->bk_niov, blk->bk_sink ? "from" : "to", - libcfs_id2str(rpc->srpc_peer)); + blk->bk_niov, blk->bk_sink ? "from" : "to", + libcfs_id2str(rpc->srpc_peer)); sfw_free_pages(rpc); } @@ -385,16 +388,16 @@ brw_bulk_ready(struct srpc_server_rpc *rpc, int status) srpc_brw_reqst_t *reqst; srpc_msg_t *reqstmsg; - LASSERT(rpc->srpc_bulk != NULL); - LASSERT(rpc->srpc_reqstbuf != NULL); + LASSERT(rpc->srpc_bulk); + LASSERT(rpc->srpc_reqstbuf); reqstmsg = &rpc->srpc_reqstbuf->buf_msg; reqst = &reqstmsg->msg_body.brw_reqst; - if (status != 0) { + if (status) { CERROR("BRW bulk %s failed for RPC from %s: %d\n", - reqst->brw_rw == LST_BRW_READ ? "READ" : "WRITE", - libcfs_id2str(rpc->srpc_peer), status); + reqst->brw_rw == LST_BRW_READ ? "READ" : "WRITE", + libcfs_id2str(rpc->srpc_peer), status); return -EIO; } @@ -404,9 +407,9 @@ brw_bulk_ready(struct srpc_server_rpc *rpc, int status) if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) __swab64s(&magic); - if (brw_check_bulk(rpc->srpc_bulk, reqst->brw_flags, magic) != 0) { + if (brw_check_bulk(rpc->srpc_bulk, reqst->brw_flags, magic)) { CERROR("Bulk data from %s is corrupted!\n", - libcfs_id2str(rpc->srpc_peer)); + libcfs_id2str(rpc->srpc_peer)); reply->brw_status = EBADMSG; } @@ -448,27 +451,27 @@ brw_server_handle(struct srpc_server_rpc *rpc) return 0; } - if ((reqstmsg->msg_ses_feats & ~LST_FEATS_MASK) != 0) { + if (reqstmsg->msg_ses_feats & ~LST_FEATS_MASK) { replymsg->msg_ses_feats = LST_FEATS_MASK; reply->brw_status = EPROTO; return 0; } - if ((reqstmsg->msg_ses_feats & LST_FEAT_BULK_LEN) == 0) { + if (!(reqstmsg->msg_ses_feats & LST_FEAT_BULK_LEN)) { /* compat with old version */ - if ((reqst->brw_len & ~CFS_PAGE_MASK) != 0) { + if (reqst->brw_len & ~CFS_PAGE_MASK) { reply->brw_status = EINVAL; return 0; } - npg = reqst->brw_len >> PAGE_CACHE_SHIFT; + npg = reqst->brw_len >> PAGE_SHIFT; } else { - npg = (reqst->brw_len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + npg = (reqst->brw_len + PAGE_SIZE - 1) >> PAGE_SHIFT; } replymsg->msg_ses_feats = reqstmsg->msg_ses_feats; - if (reqst->brw_len == 0 || npg > LNET_MAX_IOV) { + if (!reqst->brw_len || npg > LNET_MAX_IOV) { reply->brw_status = EINVAL; return 0; } @@ -476,7 +479,7 @@ brw_server_handle(struct srpc_server_rpc *rpc) rc = sfw_alloc_pages(rpc, rpc->srpc_scd->scd_cpt, npg, reqst->brw_len, reqst->brw_rw == LST_BRW_WRITE); - if (rc != 0) + if (rc) return rc; if (reqst->brw_rw == LST_BRW_READ) @@ -490,8 +493,8 @@ brw_server_handle(struct srpc_server_rpc *rpc) sfw_test_client_ops_t brw_test_client; void brw_init_test_client(void) { - brw_test_client.tso_init = brw_client_init; - brw_test_client.tso_fini = brw_client_fini; + brw_test_client.tso_init = brw_client_init; + brw_test_client.tso_fini = brw_client_fini; brw_test_client.tso_prep_rpc = brw_client_prep_rpc; brw_test_client.tso_done_rpc = brw_client_done_rpc; }; @@ -499,10 +502,9 @@ void brw_init_test_client(void) srpc_service_t brw_test_service; void brw_init_test_service(void) { - - brw_test_service.sv_id = SRPC_SERVICE_BRW; - brw_test_service.sv_name = "brw_test"; - brw_test_service.sv_handler = brw_server_handle; + brw_test_service.sv_id = SRPC_SERVICE_BRW; + brw_test_service.sv_name = "brw_test"; + brw_test_service.sv_handler = brw_server_handle; brw_test_service.sv_bulk_ready = brw_bulk_ready; - brw_test_service.sv_wi_total = brw_srv_workitems; + brw_test_service.sv_wi_total = brw_srv_workitems; } diff --git a/drivers/staging/lustre/lnet/selftest/conctl.c b/drivers/staging/lustre/lnet/selftest/conctl.c index a53466540..79ee6c0bf 100644 --- a/drivers/staging/lustre/lnet/selftest/conctl.c +++ b/drivers/staging/lustre/lnet/selftest/conctl.c @@ -51,20 +51,19 @@ lst_session_new_ioctl(lstio_session_new_args_t *args) char *name; int rc; - if (args->lstio_ses_idp == NULL || /* address for output sid */ - args->lstio_ses_key == 0 || /* no key is specified */ - args->lstio_ses_namep == NULL || /* session name */ + if (!args->lstio_ses_idp || /* address for output sid */ + !args->lstio_ses_key || /* no key is specified */ + !args->lstio_ses_namep || /* session name */ args->lstio_ses_nmlen <= 0 || args->lstio_ses_nmlen > LST_NAME_SIZE) return -EINVAL; LIBCFS_ALLOC(name, args->lstio_ses_nmlen + 1); - if (name == NULL) + if (!name) return -ENOMEM; - if (copy_from_user(name, - args->lstio_ses_namep, - args->lstio_ses_nmlen)) { + if (copy_from_user(name, args->lstio_ses_namep, + args->lstio_ses_nmlen)) { LIBCFS_FREE(name, args->lstio_ses_nmlen + 1); return -EFAULT; } @@ -96,12 +95,12 @@ lst_session_info_ioctl(lstio_session_info_args_t *args) { /* no checking of key */ - if (args->lstio_ses_idp == NULL || /* address for output sid */ - args->lstio_ses_keyp == NULL || /* address for output key */ - args->lstio_ses_featp == NULL || /* address for output features */ - args->lstio_ses_ndinfo == NULL || /* address for output ndinfo */ - args->lstio_ses_namep == NULL || /* address for output name */ - args->lstio_ses_nmlen <= 0 || + if (!args->lstio_ses_idp || /* address for output sid */ + !args->lstio_ses_keyp || /* address for output key */ + !args->lstio_ses_featp || /* address for output features */ + !args->lstio_ses_ndinfo || /* address for output ndinfo */ + !args->lstio_ses_namep || /* address for output name */ + args->lstio_ses_nmlen <= 0 || args->lstio_ses_nmlen > LST_NAME_SIZE) return -EINVAL; @@ -116,28 +115,28 @@ lst_session_info_ioctl(lstio_session_info_args_t *args) static int lst_debug_ioctl(lstio_debug_args_t *args) { - char *name = NULL; - int client = 1; - int rc; + char *name = NULL; + int client = 1; + int rc; if (args->lstio_dbg_key != console_session.ses_key) return -EACCES; - if (args->lstio_dbg_resultp == NULL) + if (!args->lstio_dbg_resultp) return -EINVAL; - if (args->lstio_dbg_namep != NULL && /* name of batch/group */ + if (args->lstio_dbg_namep && /* name of batch/group */ (args->lstio_dbg_nmlen <= 0 || args->lstio_dbg_nmlen > LST_NAME_SIZE)) return -EINVAL; - if (args->lstio_dbg_namep != NULL) { + if (args->lstio_dbg_namep) { LIBCFS_ALLOC(name, args->lstio_dbg_nmlen + 1); - if (name == NULL) + if (!name) return -ENOMEM; if (copy_from_user(name, args->lstio_dbg_namep, - args->lstio_dbg_nmlen)) { + args->lstio_dbg_nmlen)) { LIBCFS_FREE(name, args->lstio_dbg_nmlen + 1); return -EFAULT; @@ -157,7 +156,7 @@ lst_debug_ioctl(lstio_debug_args_t *args) case LST_OPC_BATCHSRV: client = 0; case LST_OPC_BATCHCLI: - if (name == NULL) + if (!name) goto out; rc = lstcon_batch_debug(args->lstio_dbg_timeout, @@ -165,7 +164,7 @@ lst_debug_ioctl(lstio_debug_args_t *args) break; case LST_OPC_GROUP: - if (name == NULL) + if (!name) goto out; rc = lstcon_group_debug(args->lstio_dbg_timeout, @@ -174,7 +173,7 @@ lst_debug_ioctl(lstio_debug_args_t *args) case LST_OPC_NODES: if (args->lstio_dbg_count <= 0 || - args->lstio_dbg_idsp == NULL) + !args->lstio_dbg_idsp) goto out; rc = lstcon_nodes_debug(args->lstio_dbg_timeout, @@ -188,7 +187,7 @@ lst_debug_ioctl(lstio_debug_args_t *args) } out: - if (name != NULL) + if (name) LIBCFS_FREE(name, args->lstio_dbg_nmlen + 1); return rc; @@ -203,18 +202,17 @@ lst_group_add_ioctl(lstio_group_add_args_t *args) if (args->lstio_grp_key != console_session.ses_key) return -EACCES; - if (args->lstio_grp_namep == NULL || + if (!args->lstio_grp_namep || args->lstio_grp_nmlen <= 0 || args->lstio_grp_nmlen > LST_NAME_SIZE) return -EINVAL; LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1); - if (name == NULL) + if (!name) return -ENOMEM; - if (copy_from_user(name, - args->lstio_grp_namep, - args->lstio_grp_nmlen)) { + if (copy_from_user(name, args->lstio_grp_namep, + args->lstio_grp_nmlen)) { LIBCFS_FREE(name, args->lstio_grp_nmlen); return -EFAULT; } @@ -231,24 +229,23 @@ lst_group_add_ioctl(lstio_group_add_args_t *args) static int lst_group_del_ioctl(lstio_group_del_args_t *args) { - int rc; - char *name; + int rc; + char *name; if (args->lstio_grp_key != console_session.ses_key) return -EACCES; - if (args->lstio_grp_namep == NULL || + if (!args->lstio_grp_namep || args->lstio_grp_nmlen <= 0 || args->lstio_grp_nmlen > LST_NAME_SIZE) return -EINVAL; LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1); - if (name == NULL) + if (!name) return -ENOMEM; - if (copy_from_user(name, - args->lstio_grp_namep, - args->lstio_grp_nmlen)) { + if (copy_from_user(name, args->lstio_grp_namep, + args->lstio_grp_nmlen)) { LIBCFS_FREE(name, args->lstio_grp_nmlen + 1); return -EFAULT; } @@ -265,24 +262,23 @@ lst_group_del_ioctl(lstio_group_del_args_t *args) static int lst_group_update_ioctl(lstio_group_update_args_t *args) { - int rc; - char *name; + int rc; + char *name; if (args->lstio_grp_key != console_session.ses_key) return -EACCES; - if (args->lstio_grp_resultp == NULL || - args->lstio_grp_namep == NULL || + if (!args->lstio_grp_resultp || + !args->lstio_grp_namep || args->lstio_grp_nmlen <= 0 || args->lstio_grp_nmlen > LST_NAME_SIZE) return -EINVAL; LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1); - if (name == NULL) + if (!name) return -ENOMEM; - if (copy_from_user(name, - args->lstio_grp_namep, + if (copy_from_user(name, args->lstio_grp_namep, args->lstio_grp_nmlen)) { LIBCFS_FREE(name, args->lstio_grp_nmlen + 1); return -EFAULT; @@ -300,8 +296,8 @@ lst_group_update_ioctl(lstio_group_update_args_t *args) break; case LST_GROUP_RMND: - if (args->lstio_grp_count <= 0 || - args->lstio_grp_idsp == NULL) { + if (args->lstio_grp_count <= 0 || + !args->lstio_grp_idsp) { rc = -EINVAL; break; } @@ -330,21 +326,21 @@ lst_nodes_add_ioctl(lstio_group_nodes_args_t *args) if (args->lstio_grp_key != console_session.ses_key) return -EACCES; - if (args->lstio_grp_idsp == NULL || /* array of ids */ + if (!args->lstio_grp_idsp || /* array of ids */ args->lstio_grp_count <= 0 || - args->lstio_grp_resultp == NULL || - args->lstio_grp_featp == NULL || - args->lstio_grp_namep == NULL || + !args->lstio_grp_resultp || + !args->lstio_grp_featp || + !args->lstio_grp_namep || args->lstio_grp_nmlen <= 0 || args->lstio_grp_nmlen > LST_NAME_SIZE) return -EINVAL; LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1); - if (name == NULL) + if (!name) return -ENOMEM; if (copy_from_user(name, args->lstio_grp_namep, - args->lstio_grp_nmlen)) { + args->lstio_grp_nmlen)) { LIBCFS_FREE(name, args->lstio_grp_nmlen + 1); return -EFAULT; @@ -357,7 +353,7 @@ lst_nodes_add_ioctl(lstio_group_nodes_args_t *args) args->lstio_grp_resultp); LIBCFS_FREE(name, args->lstio_grp_nmlen + 1); - if (rc == 0 && + if (!rc && copy_to_user(args->lstio_grp_featp, &feats, sizeof(feats))) { return -EINVAL; } @@ -371,15 +367,15 @@ lst_group_list_ioctl(lstio_group_list_args_t *args) if (args->lstio_grp_key != console_session.ses_key) return -EACCES; - if (args->lstio_grp_idx < 0 || - args->lstio_grp_namep == NULL || + if (args->lstio_grp_idx < 0 || + !args->lstio_grp_namep || args->lstio_grp_nmlen <= 0 || args->lstio_grp_nmlen > LST_NAME_SIZE) return -EINVAL; return lstcon_group_list(args->lstio_grp_idx, - args->lstio_grp_nmlen, - args->lstio_grp_namep); + args->lstio_grp_nmlen, + args->lstio_grp_namep); } static int @@ -393,24 +389,24 @@ lst_group_info_ioctl(lstio_group_info_args_t *args) if (args->lstio_grp_key != console_session.ses_key) return -EACCES; - if (args->lstio_grp_namep == NULL || + if (!args->lstio_grp_namep || args->lstio_grp_nmlen <= 0 || args->lstio_grp_nmlen > LST_NAME_SIZE) return -EINVAL; - if (args->lstio_grp_entp == NULL && /* output: group entry */ - args->lstio_grp_dentsp == NULL) /* output: node entry */ + if (!args->lstio_grp_entp && /* output: group entry */ + !args->lstio_grp_dentsp) /* output: node entry */ return -EINVAL; - if (args->lstio_grp_dentsp != NULL) { /* have node entry */ - if (args->lstio_grp_idxp == NULL || /* node index */ - args->lstio_grp_ndentp == NULL) /* # of node entry */ + if (args->lstio_grp_dentsp) { /* have node entry */ + if (!args->lstio_grp_idxp || /* node index */ + !args->lstio_grp_ndentp) /* # of node entry */ return -EINVAL; if (copy_from_user(&ndent, args->lstio_grp_ndentp, - sizeof(ndent)) || + sizeof(ndent)) || copy_from_user(&index, args->lstio_grp_idxp, - sizeof(index))) + sizeof(index))) return -EFAULT; if (ndent <= 0 || index < 0) @@ -418,12 +414,11 @@ lst_group_info_ioctl(lstio_group_info_args_t *args) } LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1); - if (name == NULL) + if (!name) return -ENOMEM; - if (copy_from_user(name, - args->lstio_grp_namep, - args->lstio_grp_nmlen)) { + if (copy_from_user(name, args->lstio_grp_namep, + args->lstio_grp_nmlen)) { LIBCFS_FREE(name, args->lstio_grp_nmlen + 1); return -EFAULT; } @@ -435,10 +430,10 @@ lst_group_info_ioctl(lstio_group_info_args_t *args) LIBCFS_FREE(name, args->lstio_grp_nmlen + 1); - if (rc != 0) + if (rc) return rc; - if (args->lstio_grp_dentsp != NULL && + if (args->lstio_grp_dentsp && (copy_to_user(args->lstio_grp_idxp, &index, sizeof(index)) || copy_to_user(args->lstio_grp_ndentp, &ndent, sizeof(ndent)))) return -EFAULT; @@ -455,18 +450,17 @@ lst_batch_add_ioctl(lstio_batch_add_args_t *args) if (args->lstio_bat_key != console_session.ses_key) return -EACCES; - if (args->lstio_bat_namep == NULL || + if (!args->lstio_bat_namep || args->lstio_bat_nmlen <= 0 || args->lstio_bat_nmlen > LST_NAME_SIZE) return -EINVAL; LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1); - if (name == NULL) + if (!name) return -ENOMEM; - if (copy_from_user(name, - args->lstio_bat_namep, - args->lstio_bat_nmlen)) { + if (copy_from_user(name, args->lstio_bat_namep, + args->lstio_bat_nmlen)) { LIBCFS_FREE(name, args->lstio_bat_nmlen + 1); return -EFAULT; } @@ -489,18 +483,17 @@ lst_batch_run_ioctl(lstio_batch_run_args_t *args) if (args->lstio_bat_key != console_session.ses_key) return -EACCES; - if (args->lstio_bat_namep == NULL || + if (!args->lstio_bat_namep || args->lstio_bat_nmlen <= 0 || args->lstio_bat_nmlen > LST_NAME_SIZE) return -EINVAL; LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1); - if (name == NULL) + if (!name) return -ENOMEM; - if (copy_from_user(name, - args->lstio_bat_namep, - args->lstio_bat_nmlen)) { + if (copy_from_user(name, args->lstio_bat_namep, + args->lstio_bat_nmlen)) { LIBCFS_FREE(name, args->lstio_bat_nmlen + 1); return -EFAULT; } @@ -524,19 +517,18 @@ lst_batch_stop_ioctl(lstio_batch_stop_args_t *args) if (args->lstio_bat_key != console_session.ses_key) return -EACCES; - if (args->lstio_bat_resultp == NULL || - args->lstio_bat_namep == NULL || + if (!args->lstio_bat_resultp || + !args->lstio_bat_namep || args->lstio_bat_nmlen <= 0 || args->lstio_bat_nmlen > LST_NAME_SIZE) return -EINVAL; LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1); - if (name == NULL) + if (!name) return -ENOMEM; - if (copy_from_user(name, - args->lstio_bat_namep, - args->lstio_bat_nmlen)) { + if (copy_from_user(name, args->lstio_bat_namep, + args->lstio_bat_nmlen)) { LIBCFS_FREE(name, args->lstio_bat_nmlen + 1); return -EFAULT; } @@ -554,14 +546,14 @@ lst_batch_stop_ioctl(lstio_batch_stop_args_t *args) static int lst_batch_query_ioctl(lstio_batch_query_args_t *args) { - char *name; - int rc; + char *name; + int rc; if (args->lstio_bat_key != console_session.ses_key) return -EACCES; - if (args->lstio_bat_resultp == NULL || - args->lstio_bat_namep == NULL || + if (!args->lstio_bat_resultp || + !args->lstio_bat_namep || args->lstio_bat_nmlen <= 0 || args->lstio_bat_nmlen > LST_NAME_SIZE) return -EINVAL; @@ -570,12 +562,11 @@ lst_batch_query_ioctl(lstio_batch_query_args_t *args) return -EINVAL; LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1); - if (name == NULL) + if (!name) return -ENOMEM; - if (copy_from_user(name, - args->lstio_bat_namep, - args->lstio_bat_nmlen)) { + if (copy_from_user(name, args->lstio_bat_namep, + args->lstio_bat_nmlen)) { LIBCFS_FREE(name, args->lstio_bat_nmlen + 1); return -EFAULT; } @@ -599,8 +590,8 @@ lst_batch_list_ioctl(lstio_batch_list_args_t *args) if (args->lstio_bat_key != console_session.ses_key) return -EACCES; - if (args->lstio_bat_idx < 0 || - args->lstio_bat_namep == NULL || + if (args->lstio_bat_idx < 0 || + !args->lstio_bat_namep || args->lstio_bat_nmlen <= 0 || args->lstio_bat_nmlen > LST_NAME_SIZE) return -EINVAL; @@ -621,24 +612,24 @@ lst_batch_info_ioctl(lstio_batch_info_args_t *args) if (args->lstio_bat_key != console_session.ses_key) return -EACCES; - if (args->lstio_bat_namep == NULL || /* batch name */ + if (!args->lstio_bat_namep || /* batch name */ args->lstio_bat_nmlen <= 0 || args->lstio_bat_nmlen > LST_NAME_SIZE) return -EINVAL; - if (args->lstio_bat_entp == NULL && /* output: batch entry */ - args->lstio_bat_dentsp == NULL) /* output: node entry */ + if (!args->lstio_bat_entp && /* output: batch entry */ + !args->lstio_bat_dentsp) /* output: node entry */ return -EINVAL; - if (args->lstio_bat_dentsp != NULL) { /* have node entry */ - if (args->lstio_bat_idxp == NULL || /* node index */ - args->lstio_bat_ndentp == NULL) /* # of node entry */ + if (args->lstio_bat_dentsp) { /* have node entry */ + if (!args->lstio_bat_idxp || /* node index */ + !args->lstio_bat_ndentp) /* # of node entry */ return -EINVAL; if (copy_from_user(&index, args->lstio_bat_idxp, - sizeof(index)) || + sizeof(index)) || copy_from_user(&ndent, args->lstio_bat_ndentp, - sizeof(ndent))) + sizeof(ndent))) return -EFAULT; if (ndent <= 0 || index < 0) @@ -646,28 +637,27 @@ lst_batch_info_ioctl(lstio_batch_info_args_t *args) } LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1); - if (name == NULL) + if (!name) return -ENOMEM; - if (copy_from_user(name, - args->lstio_bat_namep, args->lstio_bat_nmlen)) { + if (copy_from_user(name, args->lstio_bat_namep, + args->lstio_bat_nmlen)) { LIBCFS_FREE(name, args->lstio_bat_nmlen + 1); return -EFAULT; } name[args->lstio_bat_nmlen] = 0; - rc = lstcon_batch_info(name, - args->lstio_bat_entp, args->lstio_bat_server, - args->lstio_bat_testidx, &index, &ndent, - args->lstio_bat_dentsp); + rc = lstcon_batch_info(name, args->lstio_bat_entp, + args->lstio_bat_server, args->lstio_bat_testidx, + &index, &ndent, args->lstio_bat_dentsp); LIBCFS_FREE(name, args->lstio_bat_nmlen + 1); - if (rc != 0) + if (rc) return rc; - if (args->lstio_bat_dentsp != NULL && + if (args->lstio_bat_dentsp && (copy_to_user(args->lstio_bat_idxp, &index, sizeof(index)) || copy_to_user(args->lstio_bat_ndentp, &ndent, sizeof(ndent)))) rc = -EFAULT; @@ -679,98 +669,104 @@ static int lst_stat_query_ioctl(lstio_stat_args_t *args) { int rc; - char *name; + char *name = NULL; /* TODO: not finished */ if (args->lstio_sta_key != console_session.ses_key) return -EACCES; - if (args->lstio_sta_resultp == NULL || - (args->lstio_sta_namep == NULL && - args->lstio_sta_idsp == NULL) || - args->lstio_sta_nmlen <= 0 || - args->lstio_sta_nmlen > LST_NAME_SIZE) + if (!args->lstio_sta_resultp) return -EINVAL; - if (args->lstio_sta_idsp != NULL && - args->lstio_sta_count <= 0) - return -EINVAL; - - LIBCFS_ALLOC(name, args->lstio_sta_nmlen + 1); - if (name == NULL) - return -ENOMEM; - - if (copy_from_user(name, args->lstio_sta_namep, - args->lstio_sta_nmlen)) { - LIBCFS_FREE(name, args->lstio_sta_nmlen + 1); - return -EFAULT; - } + if (args->lstio_sta_idsp) { + if (args->lstio_sta_count <= 0) + return -EINVAL; - if (args->lstio_sta_idsp == NULL) { - rc = lstcon_group_stat(name, args->lstio_sta_timeout, - args->lstio_sta_resultp); - } else { rc = lstcon_nodes_stat(args->lstio_sta_count, args->lstio_sta_idsp, args->lstio_sta_timeout, args->lstio_sta_resultp); - } + } else if (args->lstio_sta_namep) { + if (args->lstio_sta_nmlen <= 0 || + args->lstio_sta_nmlen > LST_NAME_SIZE) + return -EINVAL; + + LIBCFS_ALLOC(name, args->lstio_sta_nmlen + 1); + if (!name) + return -ENOMEM; - LIBCFS_FREE(name, args->lstio_sta_nmlen + 1); + rc = copy_from_user(name, args->lstio_sta_namep, + args->lstio_sta_nmlen); + if (!rc) + rc = lstcon_group_stat(name, args->lstio_sta_timeout, + args->lstio_sta_resultp); + else + rc = -EFAULT; + } else { + rc = -EINVAL; + } + if (name) + LIBCFS_FREE(name, args->lstio_sta_nmlen + 1); return rc; } static int lst_test_add_ioctl(lstio_test_args_t *args) { - char *batch_name; - char *src_name = NULL; - char *dst_name = NULL; - void *param = NULL; - int ret = 0; - int rc = -ENOMEM; - - if (args->lstio_tes_resultp == NULL || - args->lstio_tes_retp == NULL || - args->lstio_tes_bat_name == NULL || /* no specified batch */ + char *batch_name; + char *src_name = NULL; + char *dst_name = NULL; + void *param = NULL; + int ret = 0; + int rc = -ENOMEM; + + if (!args->lstio_tes_resultp || + !args->lstio_tes_retp || + !args->lstio_tes_bat_name || /* no specified batch */ args->lstio_tes_bat_nmlen <= 0 || args->lstio_tes_bat_nmlen > LST_NAME_SIZE || - args->lstio_tes_sgrp_name == NULL || /* no source group */ + !args->lstio_tes_sgrp_name || /* no source group */ args->lstio_tes_sgrp_nmlen <= 0 || args->lstio_tes_sgrp_nmlen > LST_NAME_SIZE || - args->lstio_tes_dgrp_name == NULL || /* no target group */ + !args->lstio_tes_dgrp_name || /* no target group */ args->lstio_tes_dgrp_nmlen <= 0 || args->lstio_tes_dgrp_nmlen > LST_NAME_SIZE) return -EINVAL; - if (args->lstio_tes_loop == 0 || /* negative is infinite */ + if (!args->lstio_tes_loop || /* negative is infinite */ args->lstio_tes_concur <= 0 || args->lstio_tes_dist <= 0 || args->lstio_tes_span <= 0) return -EINVAL; /* have parameter, check if parameter length is valid */ - if (args->lstio_tes_param != NULL && + if (args->lstio_tes_param && (args->lstio_tes_param_len <= 0 || - args->lstio_tes_param_len > PAGE_CACHE_SIZE - sizeof(lstcon_test_t))) + args->lstio_tes_param_len > + PAGE_SIZE - sizeof(lstcon_test_t))) return -EINVAL; LIBCFS_ALLOC(batch_name, args->lstio_tes_bat_nmlen + 1); - if (batch_name == NULL) + if (!batch_name) return rc; LIBCFS_ALLOC(src_name, args->lstio_tes_sgrp_nmlen + 1); - if (src_name == NULL) + if (!src_name) goto out; LIBCFS_ALLOC(dst_name, args->lstio_tes_dgrp_nmlen + 1); - if (dst_name == NULL) + if (!dst_name) goto out; - if (args->lstio_tes_param != NULL) { + if (args->lstio_tes_param) { LIBCFS_ALLOC(param, args->lstio_tes_param_len); - if (param == NULL) + if (!param) goto out; + if (copy_from_user(param, args->lstio_tes_param, + args->lstio_tes_param_len)) { + rc = -EFAULT; + goto out; + } } rc = -EFAULT; @@ -779,54 +775,55 @@ static int lst_test_add_ioctl(lstio_test_args_t *args) copy_from_user(src_name, args->lstio_tes_sgrp_name, args->lstio_tes_sgrp_nmlen) || copy_from_user(dst_name, args->lstio_tes_dgrp_name, - args->lstio_tes_dgrp_nmlen) || - copy_from_user(param, args->lstio_tes_param, - args->lstio_tes_param_len)) + args->lstio_tes_dgrp_nmlen)) goto out; - rc = lstcon_test_add(batch_name, - args->lstio_tes_type, - args->lstio_tes_loop, - args->lstio_tes_concur, - args->lstio_tes_dist, args->lstio_tes_span, - src_name, dst_name, param, - args->lstio_tes_param_len, - &ret, args->lstio_tes_resultp); + rc = lstcon_test_add(batch_name, args->lstio_tes_type, + args->lstio_tes_loop, args->lstio_tes_concur, + args->lstio_tes_dist, args->lstio_tes_span, + src_name, dst_name, param, + args->lstio_tes_param_len, + &ret, args->lstio_tes_resultp); - if (ret != 0) + if (ret) rc = (copy_to_user(args->lstio_tes_retp, &ret, - sizeof(ret))) ? -EFAULT : 0; + sizeof(ret))) ? -EFAULT : 0; out: - if (batch_name != NULL) + if (batch_name) LIBCFS_FREE(batch_name, args->lstio_tes_bat_nmlen + 1); - if (src_name != NULL) + if (src_name) LIBCFS_FREE(src_name, args->lstio_tes_sgrp_nmlen + 1); - if (dst_name != NULL) + if (dst_name) LIBCFS_FREE(dst_name, args->lstio_tes_dgrp_nmlen + 1); - if (param != NULL) + if (param) LIBCFS_FREE(param, args->lstio_tes_param_len); return rc; } int -lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_data *data) +lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_hdr *hdr) { - char *buf; - int opc = data->ioc_u32[0]; - int rc; + char *buf; + struct libcfs_ioctl_data *data; + int opc; + int rc; if (cmd != IOC_LIBCFS_LNETST) return -EINVAL; - if (data->ioc_plen1 > PAGE_CACHE_SIZE) + data = container_of(hdr, struct libcfs_ioctl_data, ioc_hdr); + + opc = data->ioc_u32[0]; + + if (data->ioc_plen1 > PAGE_SIZE) return -EINVAL; LIBCFS_ALLOC(buf, data->ioc_plen1); - if (buf == NULL) + if (!buf) return -ENOMEM; /* copy in parameter */ @@ -916,7 +913,7 @@ lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_data *data) } if (copy_to_user(data->ioc_pbuf2, &console_session.ses_trans_stat, - sizeof(lstcon_trans_stat_t))) + sizeof(lstcon_trans_stat_t))) rc = -EFAULT; out: mutex_unlock(&console_session.ses_mutex); diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.c b/drivers/staging/lustre/lnet/selftest/conrpc.c index 1066c7043..35a227d0c 100644 --- a/drivers/staging/lustre/lnet/selftest/conrpc.c +++ b/drivers/staging/lustre/lnet/selftest/conrpc.c @@ -54,14 +54,16 @@ lstcon_rpc_done(srpc_client_rpc_t *rpc) { lstcon_rpc_t *crpc = (lstcon_rpc_t *)rpc->crpc_priv; - LASSERT(crpc != NULL && rpc == crpc->crp_rpc); + LASSERT(crpc && rpc == crpc->crp_rpc); LASSERT(crpc->crp_posted && !crpc->crp_finished); spin_lock(&rpc->crpc_lock); - if (crpc->crp_trans == NULL) { - /* Orphan RPC is not in any transaction, - * I'm just a poor body and nobody loves me */ + if (!crpc->crp_trans) { + /* + * Orphan RPC is not in any transaction, + * I'm just a poor body and nobody loves me + */ spin_unlock(&rpc->crpc_lock); /* release it */ @@ -72,11 +74,11 @@ lstcon_rpc_done(srpc_client_rpc_t *rpc) /* not an orphan RPC */ crpc->crp_finished = 1; - if (crpc->crp_stamp == 0) { + if (!crpc->crp_stamp) { /* not aborted */ - LASSERT(crpc->crp_status == 0); + LASSERT(!crpc->crp_status); - crpc->crp_stamp = cfs_time_current(); + crpc->crp_stamp = cfs_time_current(); crpc->crp_status = rpc->crpc_status; } @@ -94,16 +96,16 @@ lstcon_rpc_init(lstcon_node_t *nd, int service, unsigned feats, crpc->crp_rpc = sfw_create_rpc(nd->nd_id, service, feats, bulk_npg, bulk_len, lstcon_rpc_done, (void *)crpc); - if (crpc->crp_rpc == NULL) + if (!crpc->crp_rpc) return -ENOMEM; - crpc->crp_trans = NULL; - crpc->crp_node = nd; - crpc->crp_posted = 0; + crpc->crp_trans = NULL; + crpc->crp_node = nd; + crpc->crp_posted = 0; crpc->crp_finished = 0; crpc->crp_unpacked = 0; - crpc->crp_status = 0; - crpc->crp_stamp = 0; + crpc->crp_status = 0; + crpc->crp_stamp = 0; crpc->crp_embedded = embedded; INIT_LIST_HEAD(&crpc->crp_link); @@ -121,22 +123,21 @@ lstcon_rpc_prep(lstcon_node_t *nd, int service, unsigned feats, spin_lock(&console_session.ses_rpc_lock); - if (!list_empty(&console_session.ses_rpc_freelist)) { - crpc = list_entry(console_session.ses_rpc_freelist.next, - lstcon_rpc_t, crp_link); + crpc = list_first_entry_or_null(&console_session.ses_rpc_freelist, + lstcon_rpc_t, crp_link); + if (crpc) list_del_init(&crpc->crp_link); - } spin_unlock(&console_session.ses_rpc_lock); - if (crpc == NULL) { + if (!crpc) { LIBCFS_ALLOC(crpc, sizeof(*crpc)); - if (crpc == NULL) + if (!crpc) return -ENOMEM; } rc = lstcon_rpc_init(nd, service, feats, bulk_npg, bulk_len, 0, crpc); - if (rc == 0) { + if (!rc) { *crpcpp = crpc; return 0; } @@ -155,7 +156,7 @@ lstcon_rpc_put(lstcon_rpc_t *crpc) LASSERT(list_empty(&crpc->crp_link)); for (i = 0; i < bulk->bk_niov; i++) { - if (bulk->bk_iovs[i].kiov_page == NULL) + if (!bulk->bk_iovs[i].kiov_page) continue; __free_page(bulk->bk_iovs[i].kiov_page); @@ -172,7 +173,7 @@ lstcon_rpc_put(lstcon_rpc_t *crpc) spin_lock(&console_session.ses_rpc_lock); list_add(&crpc->crp_link, - &console_session.ses_rpc_freelist); + &console_session.ses_rpc_freelist); spin_unlock(&console_session.ses_rpc_lock); } @@ -186,7 +187,7 @@ lstcon_rpc_post(lstcon_rpc_t *crpc) { lstcon_rpc_trans_t *trans = crpc->crp_trans; - LASSERT(trans != NULL); + LASSERT(trans); atomic_inc(&trans->tas_remaining); crpc->crp_posted = 1; @@ -234,15 +235,17 @@ lstcon_rpc_trans_name(int transop) } int -lstcon_rpc_trans_prep(struct list_head *translist, - int transop, lstcon_rpc_trans_t **transpp) +lstcon_rpc_trans_prep(struct list_head *translist, int transop, + lstcon_rpc_trans_t **transpp) { lstcon_rpc_trans_t *trans; - if (translist != NULL) { + if (translist) { list_for_each_entry(trans, translist, tas_link) { - /* Can't enqueue two private transaction on - * the same object */ + /* + * Can't enqueue two private transaction on + * the same object + */ if ((trans->tas_opc & transop) == LST_TRANS_PRIVATE) return -EPERM; } @@ -250,12 +253,12 @@ lstcon_rpc_trans_prep(struct list_head *translist, /* create a trans group */ LIBCFS_ALLOC(trans, sizeof(*trans)); - if (trans == NULL) + if (!trans) return -ENOMEM; trans->tas_opc = transop; - if (translist == NULL) + if (!translist) INIT_LIST_HEAD(&trans->tas_olink); else list_add_tail(&trans->tas_olink, translist); @@ -285,8 +288,8 @@ void lstcon_rpc_trans_abort(lstcon_rpc_trans_t *trans, int error) { srpc_client_rpc_t *rpc; - lstcon_rpc_t *crpc; - lstcon_node_t *nd; + lstcon_rpc_t *crpc; + lstcon_node_t *nd; list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) { rpc = crpc->crp_rpc; @@ -294,8 +297,8 @@ lstcon_rpc_trans_abort(lstcon_rpc_trans_t *trans, int error) spin_lock(&rpc->crpc_lock); if (!crpc->crp_posted || /* not posted */ - crpc->crp_stamp != 0) { /* rpc done or aborted already */ - if (crpc->crp_stamp == 0) { + crpc->crp_stamp) { /* rpc done or aborted already */ + if (!crpc->crp_stamp) { crpc->crp_stamp = cfs_time_current(); crpc->crp_status = -EINTR; } @@ -303,14 +306,14 @@ lstcon_rpc_trans_abort(lstcon_rpc_trans_t *trans, int error) continue; } - crpc->crp_stamp = cfs_time_current(); + crpc->crp_stamp = cfs_time_current(); crpc->crp_status = error; spin_unlock(&rpc->crpc_lock); sfw_abort_rpc(rpc); - if (error != ETIMEDOUT) + if (error != -ETIMEDOUT) continue; nd = crpc->crp_node; @@ -329,7 +332,7 @@ lstcon_rpc_trans_check(lstcon_rpc_trans_t *trans) !list_empty(&trans->tas_olink)) /* Not an end session RPC */ return 1; - return (atomic_read(&trans->tas_remaining) == 0) ? 1 : 0; + return !atomic_read(&trans->tas_remaining) ? 1 : 0; } int @@ -366,7 +369,7 @@ lstcon_rpc_trans_postwait(lstcon_rpc_trans_t *trans, int timeout) if (console_session.ses_shutdown) rc = -ESHUTDOWN; - if (rc != 0 || atomic_read(&trans->tas_remaining) != 0) { + if (rc || atomic_read(&trans->tas_remaining)) { /* treat short timeout as canceled */ if (rc == -ETIMEDOUT && timeout < LST_TRANS_MIN_TIMEOUT * 2) rc = -EINTR; @@ -385,14 +388,14 @@ lstcon_rpc_trans_postwait(lstcon_rpc_trans_t *trans, int timeout) static int lstcon_rpc_get_reply(lstcon_rpc_t *crpc, srpc_msg_t **msgpp) { - lstcon_node_t *nd = crpc->crp_node; + lstcon_node_t *nd = crpc->crp_node; srpc_client_rpc_t *rpc = crpc->crp_rpc; srpc_generic_reply_t *rep; - LASSERT(nd != NULL && rpc != NULL); - LASSERT(crpc->crp_stamp != 0); + LASSERT(nd && rpc); + LASSERT(crpc->crp_stamp); - if (crpc->crp_status != 0) { + if (crpc->crp_status) { *msgpp = NULL; return crpc->crp_status; } @@ -422,23 +425,23 @@ lstcon_rpc_get_reply(lstcon_rpc_t *crpc, srpc_msg_t **msgpp) void lstcon_rpc_trans_stat(lstcon_rpc_trans_t *trans, lstcon_trans_stat_t *stat) { - lstcon_rpc_t *crpc; + lstcon_rpc_t *crpc; srpc_msg_t *rep; int error; - LASSERT(stat != NULL); + LASSERT(stat); memset(stat, 0, sizeof(*stat)); list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) { lstcon_rpc_stat_total(stat, 1); - LASSERT(crpc->crp_stamp != 0); + LASSERT(crpc->crp_stamp); error = lstcon_rpc_get_reply(crpc, &rep); - if (error != 0) { + if (error) { lstcon_rpc_stat_failure(stat, 1); - if (stat->trs_rpc_errno == 0) + if (!stat->trs_rpc_errno) stat->trs_rpc_errno = -error; continue; @@ -449,7 +452,7 @@ lstcon_rpc_trans_stat(lstcon_rpc_trans_t *trans, lstcon_trans_stat_t *stat) lstcon_rpc_stat_reply(trans, rep, crpc->crp_node, stat); } - if (trans->tas_opc == LST_TRANS_SESNEW && stat->trs_fwk_errno == 0) { + if (trans->tas_opc == LST_TRANS_SESNEW && !stat->trs_fwk_errno) { stat->trs_fwk_errno = lstcon_session_feats_check(trans->tas_features); } @@ -460,17 +463,15 @@ lstcon_rpc_trans_stat(lstcon_rpc_trans_t *trans, lstcon_trans_stat_t *stat) lstcon_rpc_stat_failure(stat, 0), lstcon_rpc_stat_total(stat, 0), stat->trs_rpc_errno, stat->trs_fwk_errno); - - return; } int lstcon_rpc_trans_interpreter(lstcon_rpc_trans_t *trans, - struct list_head *head_up, + struct list_head __user *head_up, lstcon_rpc_readent_func_t readent) { struct list_head tmp; - struct list_head *next; + struct list_head __user *next; lstcon_rpc_ent_t *ent; srpc_generic_reply_t *rep; lstcon_rpc_t *crpc; @@ -480,13 +481,13 @@ lstcon_rpc_trans_interpreter(lstcon_rpc_trans_t *trans, struct timeval tv; int error; - LASSERT(head_up != NULL); + LASSERT(head_up); next = head_up; list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) { if (copy_from_user(&tmp, next, - sizeof(struct list_head))) + sizeof(struct list_head))) return -EFAULT; if (tmp.next == head_up) @@ -496,7 +497,7 @@ lstcon_rpc_trans_interpreter(lstcon_rpc_trans_t *trans, ent = list_entry(next, lstcon_rpc_ent_t, rpe_link); - LASSERT(crpc->crp_stamp != 0); + LASSERT(crpc->crp_stamp); error = lstcon_rpc_get_reply(crpc, &msg); @@ -506,33 +507,32 @@ lstcon_rpc_trans_interpreter(lstcon_rpc_trans_t *trans, (unsigned long)console_session.ses_id.ses_stamp); jiffies_to_timeval(dur, &tv); - if (copy_to_user(&ent->rpe_peer, - &nd->nd_id, sizeof(lnet_process_id_t)) || + if (copy_to_user(&ent->rpe_peer, &nd->nd_id, + sizeof(lnet_process_id_t)) || copy_to_user(&ent->rpe_stamp, &tv, sizeof(tv)) || - copy_to_user(&ent->rpe_state, - &nd->nd_state, sizeof(nd->nd_state)) || + copy_to_user(&ent->rpe_state, &nd->nd_state, + sizeof(nd->nd_state)) || copy_to_user(&ent->rpe_rpc_errno, &error, - sizeof(error))) + sizeof(error))) return -EFAULT; - if (error != 0) + if (error) continue; /* RPC is done */ rep = (srpc_generic_reply_t *)&msg->msg_body.reply; - if (copy_to_user(&ent->rpe_sid, - &rep->sid, sizeof(lst_sid_t)) || - copy_to_user(&ent->rpe_fwk_errno, - &rep->status, sizeof(rep->status))) + if (copy_to_user(&ent->rpe_sid, &rep->sid, sizeof(lst_sid_t)) || + copy_to_user(&ent->rpe_fwk_errno, &rep->status, + sizeof(rep->status))) return -EFAULT; - if (readent == NULL) + if (!readent) continue; error = readent(trans->tas_opc, msg, ent); - if (error != 0) + if (error) return error; } @@ -547,8 +547,7 @@ lstcon_rpc_trans_destroy(lstcon_rpc_trans_t *trans) lstcon_rpc_t *tmp; int count = 0; - list_for_each_entry_safe(crpc, tmp, &trans->tas_rpcs_list, - crp_link) { + list_for_each_entry_safe(crpc, tmp, &trans->tas_rpcs_list, crp_link) { rpc = crpc->crp_rpc; spin_lock(&rpc->crpc_lock); @@ -563,14 +562,15 @@ lstcon_rpc_trans_destroy(lstcon_rpc_trans_t *trans) continue; } - /* rpcs can be still not callbacked (even LNetMDUnlink is called) + /* + * rpcs can be still not callbacked (even LNetMDUnlink is called) * because huge timeout for inaccessible network, don't make * user wait for them, just abandon them, they will be recycled - * in callback */ + * in callback + */ + LASSERT(crpc->crp_status); - LASSERT(crpc->crp_status != 0); - - crpc->crp_node = NULL; + crpc->crp_node = NULL; crpc->crp_trans = NULL; list_del_init(&crpc->crp_link); count++; @@ -580,7 +580,7 @@ lstcon_rpc_trans_destroy(lstcon_rpc_trans_t *trans) atomic_dec(&trans->tas_remaining); } - LASSERT(atomic_read(&trans->tas_remaining) == 0); + LASSERT(!atomic_read(&trans->tas_remaining)); list_del(&trans->tas_link); if (!list_empty(&trans->tas_olink)) @@ -590,8 +590,6 @@ lstcon_rpc_trans_destroy(lstcon_rpc_trans_t *trans) lstcon_rpc_trans_name(trans->tas_opc), count); LIBCFS_FREE(trans, sizeof(*trans)); - - return; } int @@ -606,12 +604,12 @@ lstcon_sesrpc_prep(lstcon_node_t *nd, int transop, case LST_TRANS_SESNEW: rc = lstcon_rpc_prep(nd, SRPC_SERVICE_MAKE_SESSION, feats, 0, 0, crpc); - if (rc != 0) + if (rc) return rc; msrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.mksn_reqst; - msrq->mksn_sid = console_session.ses_id; - msrq->mksn_force = console_session.ses_force; + msrq->mksn_sid = console_session.ses_id; + msrq->mksn_force = console_session.ses_force; strlcpy(msrq->mksn_name, console_session.ses_name, sizeof(msrq->mksn_name)); break; @@ -619,7 +617,7 @@ lstcon_sesrpc_prep(lstcon_node_t *nd, int transop, case LST_TRANS_SESEND: rc = lstcon_rpc_prep(nd, SRPC_SERVICE_REMOVE_SESSION, feats, 0, 0, crpc); - if (rc != 0) + if (rc) return rc; rsrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.rmsn_reqst; @@ -640,12 +638,12 @@ lstcon_dbgrpc_prep(lstcon_node_t *nd, unsigned feats, lstcon_rpc_t **crpc) int rc; rc = lstcon_rpc_prep(nd, SRPC_SERVICE_DEBUG, feats, 0, 0, crpc); - if (rc != 0) + if (rc) return rc; drq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.dbg_reqst; - drq->dbg_sid = console_session.ses_id; + drq->dbg_sid = console_session.ses_id; drq->dbg_flags = 0; return rc; @@ -655,28 +653,28 @@ int lstcon_batrpc_prep(lstcon_node_t *nd, int transop, unsigned feats, lstcon_tsb_hdr_t *tsb, lstcon_rpc_t **crpc) { - lstcon_batch_t *batch; + lstcon_batch_t *batch; srpc_batch_reqst_t *brq; - int rc; + int rc; rc = lstcon_rpc_prep(nd, SRPC_SERVICE_BATCH, feats, 0, 0, crpc); - if (rc != 0) + if (rc) return rc; brq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.bat_reqst; - brq->bar_sid = console_session.ses_id; - brq->bar_bid = tsb->tsb_id; + brq->bar_sid = console_session.ses_id; + brq->bar_bid = tsb->tsb_id; brq->bar_testidx = tsb->tsb_index; - brq->bar_opc = transop == LST_TRANS_TSBRUN ? SRPC_BATCH_OPC_RUN : - (transop == LST_TRANS_TSBSTOP ? SRPC_BATCH_OPC_STOP : - SRPC_BATCH_OPC_QUERY); + brq->bar_opc = transop == LST_TRANS_TSBRUN ? SRPC_BATCH_OPC_RUN : + (transop == LST_TRANS_TSBSTOP ? SRPC_BATCH_OPC_STOP : + SRPC_BATCH_OPC_QUERY); if (transop != LST_TRANS_TSBRUN && transop != LST_TRANS_TSBSTOP) return 0; - LASSERT(tsb->tsb_index == 0); + LASSERT(!tsb->tsb_index); batch = (lstcon_batch_t *)tsb; brq->bar_arg = batch->bat_arg; @@ -688,15 +686,15 @@ int lstcon_statrpc_prep(lstcon_node_t *nd, unsigned feats, lstcon_rpc_t **crpc) { srpc_stat_reqst_t *srq; - int rc; + int rc; rc = lstcon_rpc_prep(nd, SRPC_SERVICE_QUERY_STAT, feats, 0, 0, crpc); - if (rc != 0) + if (rc) return rc; srq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.stat_reqst; - srq->str_sid = console_session.ses_id; + srq->str_sid = console_session.ses_id; srq->str_type = 0; /* XXX remove it */ return 0; @@ -736,7 +734,7 @@ lstcon_dstnodes_prep(lstcon_group_t *grp, int idx, return -EINVAL; start = ((idx / dist) * span) % grp->grp_nnode; - end = ((idx / dist) * span + span - 1) % grp->grp_nnode; + end = ((idx / dist) * span + span - 1) % grp->grp_nnode; list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link) { nd = ndl->ndl_node; @@ -776,7 +774,7 @@ lstcon_pingrpc_prep(lst_test_ping_param_t *param, srpc_test_reqst_t *req) { test_ping_req_t *prq = &req->tsr_u.ping; - prq->png_size = param->png_size; + prq->png_size = param->png_size; prq->png_flags = param->png_flags; /* TODO dest */ return 0; @@ -787,9 +785,9 @@ lstcon_bulkrpc_v0_prep(lst_test_bulk_param_t *param, srpc_test_reqst_t *req) { test_bulk_req_t *brq = &req->tsr_u.bulk_v0; - brq->blk_opc = param->blk_opc; - brq->blk_npg = (param->blk_size + PAGE_CACHE_SIZE - 1) / - PAGE_CACHE_SIZE; + brq->blk_opc = param->blk_opc; + brq->blk_npg = (param->blk_size + PAGE_SIZE - 1) / + PAGE_SIZE; brq->blk_flags = param->blk_flags; return 0; @@ -800,9 +798,9 @@ lstcon_bulkrpc_v1_prep(lst_test_bulk_param_t *param, srpc_test_reqst_t *req) { test_bulk_req_v1_t *brq = &req->tsr_u.bulk_v1; - brq->blk_opc = param->blk_opc; - brq->blk_flags = param->blk_flags; - brq->blk_len = param->blk_size; + brq->blk_opc = param->blk_opc; + brq->blk_flags = param->blk_flags; + brq->blk_len = param->blk_size; brq->blk_offset = 0; /* reserved */ return 0; @@ -812,27 +810,27 @@ int lstcon_testrpc_prep(lstcon_node_t *nd, int transop, unsigned feats, lstcon_test_t *test, lstcon_rpc_t **crpc) { - lstcon_group_t *sgrp = test->tes_src_grp; - lstcon_group_t *dgrp = test->tes_dst_grp; + lstcon_group_t *sgrp = test->tes_src_grp; + lstcon_group_t *dgrp = test->tes_dst_grp; srpc_test_reqst_t *trq; - srpc_bulk_t *bulk; - int i; - int npg = 0; - int nob = 0; - int rc = 0; + srpc_bulk_t *bulk; + int i; + int npg = 0; + int nob = 0; + int rc = 0; if (transop == LST_TRANS_TSBCLIADD) { npg = sfw_id_pages(test->tes_span); - nob = (feats & LST_FEAT_BULK_LEN) == 0 ? - npg * PAGE_CACHE_SIZE : + nob = !(feats & LST_FEAT_BULK_LEN) ? + npg * PAGE_SIZE : sizeof(lnet_process_id_packed_t) * test->tes_span; } rc = lstcon_rpc_prep(nd, SRPC_SERVICE_TEST, feats, npg, nob, crpc); - if (rc != 0) + if (rc) return rc; - trq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.tes_reqst; + trq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.tes_reqst; if (transop == LST_TRANS_TSBSRVADD) { int ndist = (sgrp->grp_nnode + test->tes_dist - 1) / @@ -842,27 +840,27 @@ lstcon_testrpc_prep(lstcon_node_t *nd, int transop, unsigned feats, int nmax = (ndist + nspan - 1) / nspan; trq->tsr_ndest = 0; - trq->tsr_loop = nmax * test->tes_dist * test->tes_concur; + trq->tsr_loop = nmax * test->tes_dist * test->tes_concur; } else { bulk = &(*crpc)->crp_rpc->crpc_bulk; for (i = 0; i < npg; i++) { - int len; + int len; LASSERT(nob > 0); - len = (feats & LST_FEAT_BULK_LEN) == 0 ? - PAGE_CACHE_SIZE : - min_t(int, nob, PAGE_CACHE_SIZE); + len = !(feats & LST_FEAT_BULK_LEN) ? + PAGE_SIZE : + min_t(int, nob, PAGE_SIZE); nob -= len; bulk->bk_iovs[i].kiov_offset = 0; - bulk->bk_iovs[i].kiov_len = len; - bulk->bk_iovs[i].kiov_page = + bulk->bk_iovs[i].kiov_len = len; + bulk->bk_iovs[i].kiov_page = alloc_page(GFP_KERNEL); - if (bulk->bk_iovs[i].kiov_page == NULL) { + if (!bulk->bk_iovs[i].kiov_page) { lstcon_rpc_put(*crpc); return -ENOMEM; } @@ -877,19 +875,19 @@ lstcon_testrpc_prep(lstcon_node_t *nd, int transop, unsigned feats, test->tes_dist, test->tes_span, npg, &bulk->bk_iovs[0]); - if (rc != 0) { + if (rc) { lstcon_rpc_put(*crpc); return rc; } trq->tsr_ndest = test->tes_span; - trq->tsr_loop = test->tes_loop; + trq->tsr_loop = test->tes_loop; } - trq->tsr_sid = console_session.ses_id; - trq->tsr_bid = test->tes_hdr.tsb_id; - trq->tsr_concur = test->tes_concur; - trq->tsr_is_client = (transop == LST_TRANS_TSBCLIADD) ? 1 : 0; + trq->tsr_sid = console_session.ses_id; + trq->tsr_bid = test->tes_hdr.tsb_id; + trq->tsr_concur = test->tes_concur; + trq->tsr_is_client = (transop == LST_TRANS_TSBCLIADD) ? 1 : 0; trq->tsr_stop_onerr = !!test->tes_stop_onerr; switch (test->tes_type) { @@ -901,7 +899,7 @@ lstcon_testrpc_prep(lstcon_node_t *nd, int transop, unsigned feats, case LST_TEST_BULK: trq->tsr_service = SRPC_SERVICE_BRW; - if ((feats & LST_FEAT_BULK_LEN) == 0) { + if (!(feats & LST_FEAT_BULK_LEN)) { rc = lstcon_bulkrpc_v0_prep((lst_test_bulk_param_t *) &test->tes_param[0], trq); } else { @@ -923,10 +921,10 @@ lstcon_sesnew_stat_reply(lstcon_rpc_trans_t *trans, lstcon_node_t *nd, srpc_msg_t *reply) { srpc_mksn_reply_t *mksn_rep = &reply->msg_body.mksn_reply; - int status = mksn_rep->mksn_status; + int status = mksn_rep->mksn_status; - if (status == 0 && - (reply->msg_ses_feats & ~LST_FEATS_MASK) != 0) { + if (!status && + (reply->msg_ses_feats & ~LST_FEATS_MASK)) { mksn_rep->mksn_status = EPROTO; status = EPROTO; } @@ -937,22 +935,27 @@ lstcon_sesnew_stat_reply(lstcon_rpc_trans_t *trans, reply->msg_ses_feats); } - if (status != 0) + if (status) return status; if (!trans->tas_feats_updated) { - trans->tas_feats_updated = 1; - trans->tas_features = reply->msg_ses_feats; + spin_lock(&console_session.ses_rpc_lock); + if (!trans->tas_feats_updated) { /* recheck with lock */ + trans->tas_feats_updated = 1; + trans->tas_features = reply->msg_ses_feats; + } + spin_unlock(&console_session.ses_rpc_lock); } if (reply->msg_ses_feats != trans->tas_features) { CNETERR("Framework features %x from %s is different with features on this transaction: %x\n", - reply->msg_ses_feats, libcfs_nid2str(nd->nd_id.nid), - trans->tas_features); - status = mksn_rep->mksn_status = EPROTO; + reply->msg_ses_feats, libcfs_nid2str(nd->nd_id.nid), + trans->tas_features); + mksn_rep->mksn_status = EPROTO; + status = EPROTO; } - if (status == 0) { + if (!status) { /* session timeout on remote node */ nd->nd_timeout = mksn_rep->mksn_timeout; } @@ -964,17 +967,17 @@ void lstcon_rpc_stat_reply(lstcon_rpc_trans_t *trans, srpc_msg_t *msg, lstcon_node_t *nd, lstcon_trans_stat_t *stat) { - srpc_rmsn_reply_t *rmsn_rep; + srpc_rmsn_reply_t *rmsn_rep; srpc_debug_reply_t *dbg_rep; srpc_batch_reply_t *bat_rep; - srpc_test_reply_t *test_rep; - srpc_stat_reply_t *stat_rep; - int rc = 0; + srpc_test_reply_t *test_rep; + srpc_stat_reply_t *stat_rep; + int rc = 0; switch (trans->tas_opc) { case LST_TRANS_SESNEW: rc = lstcon_sesnew_stat_reply(trans, nd, msg); - if (rc == 0) { + if (!rc) { lstcon_sesop_stat_success(stat, 1); return; } @@ -985,7 +988,7 @@ lstcon_rpc_stat_reply(lstcon_rpc_trans_t *trans, srpc_msg_t *msg, case LST_TRANS_SESEND: rmsn_rep = &msg->msg_body.rmsn_reply; /* ESRCH is not an error for end session */ - if (rmsn_rep->rmsn_status == 0 || + if (!rmsn_rep->rmsn_status || rmsn_rep->rmsn_status == ESRCH) { lstcon_sesop_stat_success(stat, 1); return; @@ -1014,7 +1017,7 @@ lstcon_rpc_stat_reply(lstcon_rpc_trans_t *trans, srpc_msg_t *msg, case LST_TRANS_TSBSTOP: bat_rep = &msg->msg_body.bat_reply; - if (bat_rep->bar_status == 0) { + if (!bat_rep->bar_status) { lstcon_tsbop_stat_success(stat, 1); return; } @@ -1033,12 +1036,12 @@ lstcon_rpc_stat_reply(lstcon_rpc_trans_t *trans, srpc_msg_t *msg, case LST_TRANS_TSBSRVQRY: bat_rep = &msg->msg_body.bat_reply; - if (bat_rep->bar_active != 0) + if (bat_rep->bar_active) lstcon_tsbqry_stat_run(stat, 1); else lstcon_tsbqry_stat_idle(stat, 1); - if (bat_rep->bar_status == 0) + if (!bat_rep->bar_status) return; lstcon_tsbqry_stat_failure(stat, 1); @@ -1049,7 +1052,7 @@ lstcon_rpc_stat_reply(lstcon_rpc_trans_t *trans, srpc_msg_t *msg, case LST_TRANS_TSBSRVADD: test_rep = &msg->msg_body.tes_reply; - if (test_rep->tsr_status == 0) { + if (!test_rep->tsr_status) { lstcon_tsbop_stat_success(stat, 1); return; } @@ -1061,7 +1064,7 @@ lstcon_rpc_stat_reply(lstcon_rpc_trans_t *trans, srpc_msg_t *msg, case LST_TRANS_STATQRY: stat_rep = &msg->msg_body.stat_reply; - if (stat_rep->str_status == 0) { + if (!stat_rep->str_status) { lstcon_statqry_stat_success(stat, 1); return; } @@ -1074,10 +1077,8 @@ lstcon_rpc_stat_reply(lstcon_rpc_trans_t *trans, srpc_msg_t *msg, LBUG(); } - if (stat->trs_fwk_errno == 0) + if (!stat->trs_fwk_errno) stat->trs_fwk_errno = rc; - - return; } int @@ -1096,22 +1097,22 @@ lstcon_rpc_trans_ndlist(struct list_head *ndlist, /* Creating session RPG for list of nodes */ rc = lstcon_rpc_trans_prep(translist, transop, &trans); - if (rc != 0) { + if (rc) { CERROR("Can't create transaction %d: %d\n", transop, rc); return rc; } feats = trans->tas_features; list_for_each_entry(ndl, ndlist, ndl_link) { - rc = condition == NULL ? 1 : + rc = !condition ? 1 : condition(transop, ndl->ndl_node, arg); - if (rc == 0) + if (!rc) continue; if (rc < 0) { CDEBUG(D_NET, "Condition error while creating RPC for transaction %d: %d\n", - transop, rc); + transop, rc); break; } @@ -1146,7 +1147,7 @@ lstcon_rpc_trans_ndlist(struct list_head *ndlist, break; } - if (rc != 0) { + if (rc) { CERROR("Failed to create RPC for transaction %s: %d\n", lstcon_rpc_trans_name(transop), rc); break; @@ -1155,7 +1156,7 @@ lstcon_rpc_trans_ndlist(struct list_head *ndlist, lstcon_rpc_trans_addreq(trans, rpc); } - if (rc == 0) { + if (!rc) { *transpp = trans; return 0; } @@ -1168,7 +1169,7 @@ lstcon_rpc_trans_ndlist(struct list_head *ndlist, static void lstcon_rpc_pinger(void *arg) { - stt_timer_t *ptimer = (stt_timer_t *)arg; + struct stt_timer *ptimer = (struct stt_timer *)arg; lstcon_rpc_trans_t *trans; lstcon_rpc_t *crpc; srpc_msg_t *rep; @@ -1196,7 +1197,7 @@ lstcon_rpc_pinger(void *arg) trans = console_session.ses_ping; - LASSERT(trans != NULL); + LASSERT(trans); list_for_each_entry(ndl, &console_session.ses_ndl_list, ndl_link) { nd = ndl->ndl_node; @@ -1208,7 +1209,7 @@ lstcon_rpc_pinger(void *arg) rc = lstcon_sesrpc_prep(nd, LST_TRANS_SESEND, trans->tas_features, &crpc); - if (rc != 0) { + if (rc) { CERROR("Out of memory\n"); break; } @@ -1221,7 +1222,7 @@ lstcon_rpc_pinger(void *arg) crpc = &nd->nd_ping; - if (crpc->crp_rpc != NULL) { + if (crpc->crp_rpc) { LASSERT(crpc->crp_trans == trans); LASSERT(!list_empty(&crpc->crp_link)); @@ -1247,20 +1248,20 @@ lstcon_rpc_pinger(void *arg) if (nd->nd_state != LST_NODE_ACTIVE) continue; - intv = (jiffies - nd->nd_stamp) / HZ; + intv = (jiffies - nd->nd_stamp) / msecs_to_jiffies(MSEC_PER_SEC); if (intv < nd->nd_timeout / 2) continue; rc = lstcon_rpc_init(nd, SRPC_SERVICE_DEBUG, trans->tas_features, 0, 0, 1, crpc); - if (rc != 0) { + if (rc) { CERROR("Out of memory\n"); break; } drq = &crpc->crp_rpc->crpc_reqstmsg.msg_body.dbg_reqst; - drq->dbg_sid = console_session.ses_id; + drq->dbg_sid = console_session.ses_id; drq->dbg_flags = 0; lstcon_rpc_trans_addreq(trans, crpc); @@ -1285,15 +1286,15 @@ lstcon_rpc_pinger(void *arg) int lstcon_rpc_pinger_start(void) { - stt_timer_t *ptimer; + struct stt_timer *ptimer; int rc; LASSERT(list_empty(&console_session.ses_rpc_freelist)); - LASSERT(atomic_read(&console_session.ses_rpc_counter) == 0); + LASSERT(!atomic_read(&console_session.ses_rpc_counter)); rc = lstcon_rpc_trans_prep(NULL, LST_TRANS_SESPING, &console_session.ses_ping); - if (rc != 0) { + if (rc) { CERROR("Failed to create console pinger\n"); return rc; } @@ -1327,6 +1328,7 @@ lstcon_rpc_cleanup_wait(void) { lstcon_rpc_trans_t *trans; lstcon_rpc_t *crpc; + lstcon_rpc_t *temp; struct list_head *pacer; struct list_head zlist; @@ -1337,7 +1339,7 @@ lstcon_rpc_cleanup_wait(void) while (!list_empty(&console_session.ses_trans_list)) { list_for_each(pacer, &console_session.ses_trans_list) { trans = list_entry(pacer, lstcon_rpc_trans_t, - tas_link); + tas_link); CDEBUG(D_NET, "Session closed, wakeup transaction %s\n", lstcon_rpc_trans_name(trans->tas_opc)); @@ -1356,7 +1358,7 @@ lstcon_rpc_cleanup_wait(void) spin_lock(&console_session.ses_rpc_lock); - lst_wait_until((atomic_read(&console_session.ses_rpc_counter) == 0), + lst_wait_until(!atomic_read(&console_session.ses_rpc_counter), console_session.ses_rpc_lock, "Network is not accessible or target is down, waiting for %d console RPCs to being recycled\n", atomic_read(&console_session.ses_rpc_counter)); @@ -1366,9 +1368,7 @@ lstcon_rpc_cleanup_wait(void) spin_unlock(&console_session.ses_rpc_lock); - while (!list_empty(&zlist)) { - crpc = list_entry(zlist.next, lstcon_rpc_t, crp_link); - + list_for_each_entry_safe(crpc, temp, &zlist, crp_link) { list_del(&crpc->crp_link); LIBCFS_FREE(crpc, sizeof(lstcon_rpc_t)); } @@ -1394,5 +1394,5 @@ void lstcon_rpc_module_fini(void) { LASSERT(list_empty(&console_session.ses_rpc_freelist)); - LASSERT(atomic_read(&console_session.ses_rpc_counter) == 0); + LASSERT(!atomic_read(&console_session.ses_rpc_counter)); } diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.h b/drivers/staging/lustre/lnet/selftest/conrpc.h index 95c832ff7..3e7839dad 100644 --- a/drivers/staging/lustre/lnet/selftest/conrpc.h +++ b/drivers/staging/lustre/lnet/selftest/conrpc.h @@ -51,12 +51,12 @@ #include "selftest.h" /* Console rpc and rpc transaction */ -#define LST_TRANS_TIMEOUT 30 -#define LST_TRANS_MIN_TIMEOUT 3 +#define LST_TRANS_TIMEOUT 30 +#define LST_TRANS_MIN_TIMEOUT 3 #define LST_VALIDATE_TIMEOUT(t) min(max(t, LST_TRANS_MIN_TIMEOUT), LST_TRANS_TIMEOUT) -#define LST_PING_INTERVAL 8 +#define LST_PING_INTERVAL 8 struct lstcon_rpc_trans; struct lstcon_tsb_hdr; @@ -64,49 +64,50 @@ struct lstcon_test; struct lstcon_node; typedef struct lstcon_rpc { - struct list_head crp_link; /* chain on rpc transaction */ - srpc_client_rpc_t *crp_rpc; /* client rpc */ - struct lstcon_node *crp_node; /* destination node */ - struct lstcon_rpc_trans *crp_trans; /* conrpc transaction */ - - unsigned int crp_posted:1; /* rpc is posted */ - unsigned int crp_finished:1; /* rpc is finished */ - unsigned int crp_unpacked:1; /* reply is unpacked */ + struct list_head crp_link; /* chain on rpc transaction */ + srpc_client_rpc_t *crp_rpc; /* client rpc */ + struct lstcon_node *crp_node; /* destination node */ + struct lstcon_rpc_trans *crp_trans; /* conrpc transaction */ + + unsigned int crp_posted:1; /* rpc is posted */ + unsigned int crp_finished:1; /* rpc is finished */ + unsigned int crp_unpacked:1; /* reply is unpacked */ /** RPC is embedded in other structure and can't free it */ - unsigned int crp_embedded:1; - int crp_status; /* console rpc errors */ - unsigned long crp_stamp; /* replied time stamp */ + unsigned int crp_embedded:1; + int crp_status; /* console rpc errors */ + unsigned long crp_stamp; /* replied time stamp */ } lstcon_rpc_t; typedef struct lstcon_rpc_trans { - struct list_head tas_olink; /* link chain on owner list */ - struct list_head tas_link; /* link chain on global list */ - int tas_opc; /* operation code of transaction */ - unsigned tas_feats_updated; /* features mask is uptodate */ - unsigned tas_features; /* test features mask */ - wait_queue_head_t tas_waitq; /* wait queue head */ - atomic_t tas_remaining; /* # of un-scheduled rpcs */ + struct list_head tas_olink; /* link chain on owner list */ + struct list_head tas_link; /* link chain on global list */ + int tas_opc; /* operation code of transaction */ + unsigned tas_feats_updated; /* features mask is uptodate */ + unsigned tas_features; /* test features mask */ + wait_queue_head_t tas_waitq; /* wait queue head */ + atomic_t tas_remaining; /* # of un-scheduled rpcs */ struct list_head tas_rpcs_list; /* queued requests */ } lstcon_rpc_trans_t; -#define LST_TRANS_PRIVATE 0x1000 +#define LST_TRANS_PRIVATE 0x1000 #define LST_TRANS_SESNEW (LST_TRANS_PRIVATE | 0x01) #define LST_TRANS_SESEND (LST_TRANS_PRIVATE | 0x02) #define LST_TRANS_SESQRY 0x03 -#define LST_TRANS_SESPING 0x04 +#define LST_TRANS_SESPING 0x04 -#define LST_TRANS_TSBCLIADD (LST_TRANS_PRIVATE | 0x11) -#define LST_TRANS_TSBSRVADD (LST_TRANS_PRIVATE | 0x12) +#define LST_TRANS_TSBCLIADD (LST_TRANS_PRIVATE | 0x11) +#define LST_TRANS_TSBSRVADD (LST_TRANS_PRIVATE | 0x12) #define LST_TRANS_TSBRUN (LST_TRANS_PRIVATE | 0x13) -#define LST_TRANS_TSBSTOP (LST_TRANS_PRIVATE | 0x14) -#define LST_TRANS_TSBCLIQRY 0x15 -#define LST_TRANS_TSBSRVQRY 0x16 +#define LST_TRANS_TSBSTOP (LST_TRANS_PRIVATE | 0x14) +#define LST_TRANS_TSBCLIQRY 0x15 +#define LST_TRANS_TSBSRVQRY 0x16 -#define LST_TRANS_STATQRY 0x21 +#define LST_TRANS_STATQRY 0x21 typedef int (*lstcon_rpc_cond_func_t)(int, struct lstcon_node *, void *); -typedef int (*lstcon_rpc_readent_func_t)(int, srpc_msg_t *, lstcon_rpc_ent_t *); +typedef int (*lstcon_rpc_readent_func_t)(int, srpc_msg_t *, + lstcon_rpc_ent_t __user *); int lstcon_sesrpc_prep(struct lstcon_node *nd, int transop, unsigned version, lstcon_rpc_t **crpc); @@ -128,7 +129,7 @@ int lstcon_rpc_trans_ndlist(struct list_head *ndlist, void lstcon_rpc_trans_stat(lstcon_rpc_trans_t *trans, lstcon_trans_stat_t *stat); int lstcon_rpc_trans_interpreter(lstcon_rpc_trans_t *trans, - struct list_head *head_up, + struct list_head __user *head_up, lstcon_rpc_readent_func_t readent); void lstcon_rpc_trans_abort(lstcon_rpc_trans_t *trans, int error); void lstcon_rpc_trans_destroy(lstcon_rpc_trans_t *trans); diff --git a/drivers/staging/lustre/lnet/selftest/console.c b/drivers/staging/lustre/lnet/selftest/console.c index 5619fc430..1a923ea3a 100644 --- a/drivers/staging/lustre/lnet/selftest/console.c +++ b/drivers/staging/lustre/lnet/selftest/console.c @@ -49,16 +49,16 @@ do { \ if ((nd)->nd_state == LST_NODE_ACTIVE) \ (p)->nle_nactive++; \ - else if ((nd)->nd_state == LST_NODE_BUSY) \ + else if ((nd)->nd_state == LST_NODE_BUSY) \ (p)->nle_nbusy++; \ - else if ((nd)->nd_state == LST_NODE_DOWN) \ + else if ((nd)->nd_state == LST_NODE_DOWN) \ (p)->nle_ndown++; \ else \ (p)->nle_nunknown++; \ (p)->nle_nnode++; \ } while (0) -lstcon_session_t console_session; +struct lstcon_session console_session; static void lstcon_node_get(lstcon_node_t *nd) @@ -71,12 +71,13 @@ lstcon_node_get(lstcon_node_t *nd) static int lstcon_node_find(lnet_process_id_t id, lstcon_node_t **ndpp, int create) { - lstcon_ndlink_t *ndl; + lstcon_ndlink_t *ndl; unsigned int idx = LNET_NIDADDR(id.nid) % LST_GLOBAL_HASHSIZE; LASSERT(id.nid != LNET_NID_ANY); - list_for_each_entry(ndl, &console_session.ses_ndl_hash[idx], ndl_hlink) { + list_for_each_entry(ndl, &console_session.ses_ndl_hash[idx], + ndl_hlink) { if (ndl->ndl_node->nd_id.nid != id.nid || ndl->ndl_node->nd_id.pid != id.pid) continue; @@ -90,23 +91,25 @@ lstcon_node_find(lnet_process_id_t id, lstcon_node_t **ndpp, int create) return -ENOENT; LIBCFS_ALLOC(*ndpp, sizeof(lstcon_node_t) + sizeof(lstcon_ndlink_t)); - if (*ndpp == NULL) + if (!*ndpp) return -ENOMEM; ndl = (lstcon_ndlink_t *)(*ndpp + 1); ndl->ndl_node = *ndpp; - ndl->ndl_node->nd_ref = 1; - ndl->ndl_node->nd_id = id; + ndl->ndl_node->nd_ref = 1; + ndl->ndl_node->nd_id = id; ndl->ndl_node->nd_stamp = cfs_time_current(); ndl->ndl_node->nd_state = LST_NODE_UNKNOWN; ndl->ndl_node->nd_timeout = 0; memset(&ndl->ndl_node->nd_ping, 0, sizeof(lstcon_rpc_t)); - /* queued in global hash & list, no refcount is taken by + /* + * queued in global hash & list, no refcount is taken by * global hash & list, if caller release his refcount, - * node will be released */ + * node will be released + */ list_add_tail(&ndl->ndl_hlink, &console_session.ses_ndl_hash[idx]); list_add_tail(&ndl->ndl_link, &console_session.ses_ndl_list); @@ -157,16 +160,16 @@ lstcon_ndlink_find(struct list_head *hash, return 0; } - if (create == 0) + if (!create) return -ENOENT; /* find or create in session hash */ rc = lstcon_node_find(id, &nd, (create == 1) ? 1 : 0); - if (rc != 0) + if (rc) return rc; LIBCFS_ALLOC(ndl, sizeof(lstcon_ndlink_t)); - if (ndl == NULL) { + if (!ndl) { lstcon_node_put(nd); return -ENOMEM; } @@ -177,7 +180,7 @@ lstcon_ndlink_find(struct list_head *hash, INIT_LIST_HEAD(&ndl->ndl_link); list_add_tail(&ndl->ndl_hlink, &hash[idx]); - return 0; + return 0; } static void @@ -200,12 +203,18 @@ lstcon_group_alloc(char *name, lstcon_group_t **grpp) LIBCFS_ALLOC(grp, offsetof(lstcon_group_t, grp_ndl_hash[LST_NODE_HASHSIZE])); - if (grp == NULL) + if (!grp) return -ENOMEM; grp->grp_ref = 1; - if (name != NULL) - strcpy(grp->grp_name, name); + if (name) { + if (strlen(name) > sizeof(grp->grp_name) - 1) { + LIBCFS_FREE(grp, offsetof(lstcon_group_t, + grp_ndl_hash[LST_NODE_HASHSIZE])); + return -E2BIG; + } + strncpy(grp->grp_name, name, sizeof(grp->grp_name)); + } INIT_LIST_HEAD(&grp->grp_link); INIT_LIST_HEAD(&grp->grp_ndl_list); @@ -234,7 +243,7 @@ lstcon_group_drain(lstcon_group_t *grp, int keep) lstcon_ndlink_t *tmp; list_for_each_entry_safe(ndl, tmp, &grp->grp_ndl_list, ndl_link) { - if ((ndl->ndl_node->nd_state & keep) == 0) + if (!(ndl->ndl_node->nd_state & keep)) lstcon_group_ndlink_release(grp, ndl); } } @@ -252,9 +261,8 @@ lstcon_group_decref(lstcon_group_t *grp) lstcon_group_drain(grp, 0); - for (i = 0; i < LST_NODE_HASHSIZE; i++) { + for (i = 0; i < LST_NODE_HASHSIZE; i++) LASSERT(list_empty(&grp->grp_ndl_hash[i])); - } LIBCFS_FREE(grp, offsetof(lstcon_group_t, grp_ndl_hash[LST_NODE_HASHSIZE])); @@ -266,7 +274,7 @@ lstcon_group_find(const char *name, lstcon_group_t **grpp) lstcon_group_t *grp; list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) { - if (strncmp(grp->grp_name, name, LST_NAME_SIZE) != 0) + if (strncmp(grp->grp_name, name, LST_NAME_SIZE)) continue; lstcon_group_addref(grp); /* +1 ref for caller */ @@ -284,7 +292,7 @@ lstcon_group_ndlink_find(lstcon_group_t *grp, lnet_process_id_t id, int rc; rc = lstcon_ndlink_find(&grp->grp_ndl_hash[0], id, ndlpp, create); - if (rc != 0) + if (rc) return rc; if (!list_empty(&(*ndlpp)->ndl_link)) @@ -309,7 +317,7 @@ lstcon_group_ndlink_move(lstcon_group_t *old, lstcon_group_t *new, lstcon_ndlink_t *ndl) { unsigned int idx = LNET_NIDADDR(ndl->ndl_node->nd_id.nid) % - LST_NODE_HASHSIZE; + LST_NODE_HASHSIZE; list_del(&ndl->ndl_hlink); list_del(&ndl->ndl_link); @@ -327,7 +335,7 @@ lstcon_group_move(lstcon_group_t *old, lstcon_group_t *new) while (!list_empty(&old->grp_ndl_list)) { ndl = list_entry(old->grp_ndl_list.next, - lstcon_ndlink_t, ndl_link); + lstcon_ndlink_t, ndl_link); lstcon_group_ndlink_move(old, new, ndl); } } @@ -347,7 +355,7 @@ lstcon_sesrpc_condition(int transop, lstcon_node_t *nd, void *arg) if (nd->nd_state != LST_NODE_ACTIVE) return 0; - if (grp != NULL && nd->nd_ref > 1) + if (grp && nd->nd_ref > 1) return 0; break; @@ -363,7 +371,7 @@ lstcon_sesrpc_condition(int transop, lstcon_node_t *nd, void *arg) static int lstcon_sesrpc_readent(int transop, srpc_msg_t *msg, - lstcon_rpc_ent_t *ent_up) + lstcon_rpc_ent_t __user *ent_up) { srpc_debug_reply_t *rep; @@ -376,9 +384,9 @@ lstcon_sesrpc_readent(int transop, srpc_msg_t *msg, rep = &msg->msg_body.dbg_reply; if (copy_to_user(&ent_up->rpe_priv[0], - &rep->dbg_timeout, sizeof(int)) || + &rep->dbg_timeout, sizeof(int)) || copy_to_user(&ent_up->rpe_payload[0], - &rep->dbg_name, LST_NAME_SIZE)) + &rep->dbg_name, LST_NAME_SIZE)) return -EFAULT; return 0; @@ -392,18 +400,18 @@ lstcon_sesrpc_readent(int transop, srpc_msg_t *msg, static int lstcon_group_nodes_add(lstcon_group_t *grp, - int count, lnet_process_id_t *ids_up, - unsigned *featp, struct list_head *result_up) + int count, lnet_process_id_t __user *ids_up, + unsigned *featp, struct list_head __user *result_up) { lstcon_rpc_trans_t *trans; - lstcon_ndlink_t *ndl; + lstcon_ndlink_t *ndl; lstcon_group_t *tmp; lnet_process_id_t id; int i; int rc; rc = lstcon_group_alloc(NULL, &tmp); - if (rc != 0) { + if (rc) { CERROR("Out of memory\n"); return -ENOMEM; } @@ -416,18 +424,18 @@ lstcon_group_nodes_add(lstcon_group_t *grp, /* skip if it's in this group already */ rc = lstcon_group_ndlink_find(grp, id, &ndl, 0); - if (rc == 0) + if (!rc) continue; /* add to tmp group */ rc = lstcon_group_ndlink_find(tmp, id, &ndl, 1); - if (rc != 0) { + if (rc) { CERROR("Can't create ndlink, out of memory\n"); break; } } - if (rc != 0) { + if (rc) { lstcon_group_decref(tmp); return rc; } @@ -435,7 +443,7 @@ lstcon_group_nodes_add(lstcon_group_t *grp, rc = lstcon_rpc_trans_ndlist(&tmp->grp_ndl_list, &tmp->grp_trans_list, LST_TRANS_SESNEW, tmp, lstcon_sesrpc_condition, &trans); - if (rc != 0) { + if (rc) { CERROR("Can't create transaction: %d\n", rc); lstcon_group_decref(tmp); return rc; @@ -459,8 +467,8 @@ lstcon_group_nodes_add(lstcon_group_t *grp, static int lstcon_group_nodes_remove(lstcon_group_t *grp, - int count, lnet_process_id_t *ids_up, - struct list_head *result_up) + int count, lnet_process_id_t __user *ids_up, + struct list_head __user *result_up) { lstcon_rpc_trans_t *trans; lstcon_ndlink_t *ndl; @@ -472,7 +480,7 @@ lstcon_group_nodes_remove(lstcon_group_t *grp, /* End session and remove node from the group */ rc = lstcon_group_alloc(NULL, &tmp); - if (rc != 0) { + if (rc) { CERROR("Out of memory\n"); return -ENOMEM; } @@ -484,14 +492,14 @@ lstcon_group_nodes_remove(lstcon_group_t *grp, } /* move node to tmp group */ - if (lstcon_group_ndlink_find(grp, id, &ndl, 0) == 0) + if (!lstcon_group_ndlink_find(grp, id, &ndl, 0)) lstcon_group_ndlink_move(grp, tmp, ndl); } rc = lstcon_rpc_trans_ndlist(&tmp->grp_ndl_list, &tmp->grp_trans_list, LST_TRANS_SESEND, tmp, lstcon_sesrpc_condition, &trans); - if (rc != 0) { + if (rc) { CERROR("Can't create transaction: %d\n", rc); goto error; } @@ -518,15 +526,15 @@ lstcon_group_add(char *name) lstcon_group_t *grp; int rc; - rc = (lstcon_group_find(name, &grp) == 0) ? -EEXIST : 0; - if (rc != 0) { + rc = lstcon_group_find(name, &grp) ? 0 : -EEXIST; + if (rc) { /* find a group with same name */ lstcon_group_decref(grp); return rc; } rc = lstcon_group_alloc(name, &grp); - if (rc != 0) { + if (rc) { CERROR("Can't allocate descriptor for group %s\n", name); return -ENOMEM; } @@ -537,17 +545,17 @@ lstcon_group_add(char *name) } int -lstcon_nodes_add(char *name, int count, lnet_process_id_t *ids_up, - unsigned *featp, struct list_head *result_up) +lstcon_nodes_add(char *name, int count, lnet_process_id_t __user *ids_up, + unsigned *featp, struct list_head __user *result_up) { lstcon_group_t *grp; int rc; LASSERT(count > 0); - LASSERT(ids_up != NULL); + LASSERT(ids_up); rc = lstcon_group_find(name, &grp); - if (rc != 0) { + if (rc) { CDEBUG(D_NET, "Can't find group %s\n", name); return rc; } @@ -575,7 +583,7 @@ lstcon_group_del(char *name) int rc; rc = lstcon_group_find(name, &grp); - if (rc != 0) { + if (rc) { CDEBUG(D_NET, "Can't find group: %s\n", name); return rc; } @@ -590,7 +598,7 @@ lstcon_group_del(char *name) rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list, &grp->grp_trans_list, LST_TRANS_SESEND, grp, lstcon_sesrpc_condition, &trans); - if (rc != 0) { + if (rc) { CERROR("Can't create transaction: %d\n", rc); lstcon_group_decref(grp); return rc; @@ -601,8 +609,10 @@ lstcon_group_del(char *name) lstcon_rpc_trans_destroy(trans); lstcon_group_decref(grp); - /* -ref for session, it's destroyed, - * status can't be rolled back, destroy group anyway */ + /* + * -ref for session, it's destroyed, + * status can't be rolled back, destroy group anyway + */ lstcon_group_decref(grp); return rc; @@ -615,7 +625,7 @@ lstcon_group_clean(char *name, int args) int rc; rc = lstcon_group_find(name, &grp); - if (rc != 0) { + if (rc) { CDEBUG(D_NET, "Can't find group %s\n", name); return rc; } @@ -641,14 +651,14 @@ lstcon_group_clean(char *name, int args) } int -lstcon_nodes_remove(char *name, int count, - lnet_process_id_t *ids_up, struct list_head *result_up) +lstcon_nodes_remove(char *name, int count, lnet_process_id_t __user *ids_up, + struct list_head __user *result_up) { lstcon_group_t *grp = NULL; int rc; rc = lstcon_group_find(name, &grp); - if (rc != 0) { + if (rc) { CDEBUG(D_NET, "Can't find group: %s\n", name); return rc; } @@ -671,14 +681,14 @@ lstcon_nodes_remove(char *name, int count, } int -lstcon_group_refresh(char *name, struct list_head *result_up) +lstcon_group_refresh(char *name, struct list_head __user *result_up) { lstcon_rpc_trans_t *trans; lstcon_group_t *grp; int rc; rc = lstcon_group_find(name, &grp); - if (rc != 0) { + if (rc) { CDEBUG(D_NET, "Can't find group: %s\n", name); return rc; } @@ -694,7 +704,7 @@ lstcon_group_refresh(char *name, struct list_head *result_up) rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list, &grp->grp_trans_list, LST_TRANS_SESNEW, grp, lstcon_sesrpc_condition, &trans); - if (rc != 0) { + if (rc) { /* local error, return */ CDEBUG(D_NET, "Can't create transaction: %d\n", rc); lstcon_group_decref(grp); @@ -713,15 +723,15 @@ lstcon_group_refresh(char *name, struct list_head *result_up) } int -lstcon_group_list(int index, int len, char *name_up) +lstcon_group_list(int index, int len, char __user *name_up) { lstcon_group_t *grp; LASSERT(index >= 0); - LASSERT(name_up != NULL); + LASSERT(name_up); list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) { - if (index-- == 0) { + if (!index--) { return copy_to_user(name_up, grp->grp_name, len) ? -EFAULT : 0; } @@ -732,15 +742,15 @@ lstcon_group_list(int index, int len, char *name_up) static int lstcon_nodes_getent(struct list_head *head, int *index_p, - int *count_p, lstcon_node_ent_t *dents_up) + int *count_p, lstcon_node_ent_t __user *dents_up) { lstcon_ndlink_t *ndl; lstcon_node_t *nd; int count = 0; int index = 0; - LASSERT(index_p != NULL && count_p != NULL); - LASSERT(dents_up != NULL); + LASSERT(index_p && count_p); + LASSERT(dents_up); LASSERT(*index_p >= 0); LASSERT(*count_p > 0); @@ -753,9 +763,9 @@ lstcon_nodes_getent(struct list_head *head, int *index_p, nd = ndl->ndl_node; if (copy_to_user(&dents_up[count].nde_id, - &nd->nd_id, sizeof(nd->nd_id)) || + &nd->nd_id, sizeof(nd->nd_id)) || copy_to_user(&dents_up[count].nde_state, - &nd->nd_state, sizeof(nd->nd_state))) + &nd->nd_state, sizeof(nd->nd_state))) return -EFAULT; count++; @@ -771,8 +781,9 @@ lstcon_nodes_getent(struct list_head *head, int *index_p, } int -lstcon_group_info(char *name, lstcon_ndlist_ent_t *gents_p, - int *index_p, int *count_p, lstcon_node_ent_t *dents_up) +lstcon_group_info(char *name, lstcon_ndlist_ent_t __user *gents_p, + int *index_p, int *count_p, + lstcon_node_ent_t __user *dents_up) { lstcon_ndlist_ent_t *gentp; lstcon_group_t *grp; @@ -780,7 +791,7 @@ lstcon_group_info(char *name, lstcon_ndlist_ent_t *gents_p, int rc; rc = lstcon_group_find(name, &grp); - if (rc != 0) { + if (rc) { CDEBUG(D_NET, "Can't find group %s\n", name); return rc; } @@ -796,7 +807,7 @@ lstcon_group_info(char *name, lstcon_ndlist_ent_t *gents_p, /* non-verbose query */ LIBCFS_ALLOC(gentp, sizeof(lstcon_ndlist_ent_t)); - if (gentp == NULL) { + if (!gentp) { CERROR("Can't allocate ndlist_ent\n"); lstcon_group_decref(grp); @@ -807,7 +818,7 @@ lstcon_group_info(char *name, lstcon_ndlist_ent_t *gents_p, LST_NODE_STATE_COUNTER(ndl->ndl_node, gentp); rc = copy_to_user(gents_p, gentp, - sizeof(lstcon_ndlist_ent_t)) ? -EFAULT : 0; + sizeof(lstcon_ndlist_ent_t)) ? -EFAULT : 0; LIBCFS_FREE(gentp, sizeof(lstcon_ndlist_ent_t)); @@ -822,7 +833,7 @@ lstcon_batch_find(const char *name, lstcon_batch_t **batpp) lstcon_batch_t *bat; list_for_each_entry(bat, &console_session.ses_bat_list, bat_link) { - if (strncmp(bat->bat_name, name, LST_NAME_SIZE) == 0) { + if (!strncmp(bat->bat_name, name, LST_NAME_SIZE)) { *batpp = bat; return 0; } @@ -838,21 +849,21 @@ lstcon_batch_add(char *name) int i; int rc; - rc = (lstcon_batch_find(name, &bat) == 0) ? -EEXIST : 0; - if (rc != 0) { + rc = !lstcon_batch_find(name, &bat) ? -EEXIST : 0; + if (rc) { CDEBUG(D_NET, "Batch %s already exists\n", name); return rc; } LIBCFS_ALLOC(bat, sizeof(lstcon_batch_t)); - if (bat == NULL) { + if (!bat) { CERROR("Can't allocate descriptor for batch %s\n", name); return -ENOMEM; } LIBCFS_ALLOC(bat->bat_cli_hash, sizeof(struct list_head) * LST_NODE_HASHSIZE); - if (bat->bat_cli_hash == NULL) { + if (!bat->bat_cli_hash) { CERROR("Can't allocate hash for batch %s\n", name); LIBCFS_FREE(bat, sizeof(lstcon_batch_t)); @@ -861,7 +872,7 @@ lstcon_batch_add(char *name) LIBCFS_ALLOC(bat->bat_srv_hash, sizeof(struct list_head) * LST_NODE_HASHSIZE); - if (bat->bat_srv_hash == NULL) { + if (!bat->bat_srv_hash) { CERROR("Can't allocate hash for batch %s\n", name); LIBCFS_FREE(bat->bat_cli_hash, LST_NODE_HASHSIZE); LIBCFS_FREE(bat, sizeof(lstcon_batch_t)); @@ -869,7 +880,13 @@ lstcon_batch_add(char *name) return -ENOMEM; } - strcpy(bat->bat_name, name); + if (strlen(name) > sizeof(bat->bat_name) - 1) { + LIBCFS_FREE(bat->bat_srv_hash, LST_NODE_HASHSIZE); + LIBCFS_FREE(bat->bat_cli_hash, LST_NODE_HASHSIZE); + LIBCFS_FREE(bat, sizeof(lstcon_batch_t)); + return -E2BIG; + } + strncpy(bat->bat_name, name, sizeof(bat->bat_name)); bat->bat_hdr.tsb_index = 0; bat->bat_hdr.tsb_id.bat_id = ++console_session.ses_id_cookie; @@ -892,17 +909,17 @@ lstcon_batch_add(char *name) } int -lstcon_batch_list(int index, int len, char *name_up) +lstcon_batch_list(int index, int len, char __user *name_up) { lstcon_batch_t *bat; - LASSERT(name_up != NULL); + LASSERT(name_up); LASSERT(index >= 0); list_for_each_entry(bat, &console_session.ses_bat_list, bat_link) { - if (index-- == 0) { + if (!index--) { return copy_to_user(name_up, bat->bat_name, len) ? - -EFAULT : 0; + -EFAULT : 0; } } @@ -910,20 +927,20 @@ lstcon_batch_list(int index, int len, char *name_up) } int -lstcon_batch_info(char *name, lstcon_test_batch_ent_t *ent_up, int server, - int testidx, int *index_p, int *ndent_p, - lstcon_node_ent_t *dents_up) +lstcon_batch_info(char *name, lstcon_test_batch_ent_t __user *ent_up, + int server, int testidx, int *index_p, int *ndent_p, + lstcon_node_ent_t __user *dents_up) { lstcon_test_batch_ent_t *entp; struct list_head *clilst; struct list_head *srvlst; lstcon_test_t *test = NULL; lstcon_batch_t *bat; - lstcon_ndlink_t *ndl; + lstcon_ndlink_t *ndl; int rc; rc = lstcon_batch_find(name, &bat); - if (rc != 0) { + if (rc) { CDEBUG(D_NET, "Can't find batch %s\n", name); return -ENOENT; } @@ -941,12 +958,12 @@ lstcon_batch_info(char *name, lstcon_test_batch_ent_t *ent_up, int server, } } - clilst = (test == NULL) ? &bat->bat_cli_list : - &test->tes_src_grp->grp_ndl_list; - srvlst = (test == NULL) ? &bat->bat_srv_list : - &test->tes_dst_grp->grp_ndl_list; + clilst = !test ? &bat->bat_cli_list : + &test->tes_src_grp->grp_ndl_list; + srvlst = !test ? &bat->bat_srv_list : + &test->tes_dst_grp->grp_ndl_list; - if (dents_up != NULL) { + if (dents_up) { rc = lstcon_nodes_getent((server ? srvlst : clilst), index_p, ndent_p, dents_up); return rc; @@ -954,17 +971,16 @@ lstcon_batch_info(char *name, lstcon_test_batch_ent_t *ent_up, int server, /* non-verbose query */ LIBCFS_ALLOC(entp, sizeof(lstcon_test_batch_ent_t)); - if (entp == NULL) + if (!entp) return -ENOMEM; - if (test == NULL) { + if (!test) { entp->u.tbe_batch.bae_ntest = bat->bat_ntest; entp->u.tbe_batch.bae_state = bat->bat_state; } else { - - entp->u.tbe_test.tse_type = test->tes_type; - entp->u.tbe_test.tse_loop = test->tes_loop; + entp->u.tbe_test.tse_type = test->tes_type; + entp->u.tbe_test.tse_loop = test->tes_loop; entp->u.tbe_test.tse_concur = test->tes_concur; } @@ -975,7 +991,7 @@ lstcon_batch_info(char *name, lstcon_test_batch_ent_t *ent_up, int server, LST_NODE_STATE_COUNTER(ndl->ndl_node, &entp->tbe_srv_nle); rc = copy_to_user(ent_up, entp, - sizeof(lstcon_test_batch_ent_t)) ? -EFAULT : 0; + sizeof(lstcon_test_batch_ent_t)) ? -EFAULT : 0; LIBCFS_FREE(entp, sizeof(lstcon_test_batch_ent_t)); @@ -1006,7 +1022,7 @@ lstcon_batrpc_condition(int transop, lstcon_node_t *nd, void *arg) static int lstcon_batch_op(lstcon_batch_t *bat, int transop, - struct list_head *result_up) + struct list_head __user *result_up) { lstcon_rpc_trans_t *trans; int rc; @@ -1014,7 +1030,7 @@ lstcon_batch_op(lstcon_batch_t *bat, int transop, rc = lstcon_rpc_trans_ndlist(&bat->bat_cli_list, &bat->bat_trans_list, transop, bat, lstcon_batrpc_condition, &trans); - if (rc != 0) { + if (rc) { CERROR("Can't create transaction: %d\n", rc); return rc; } @@ -1029,12 +1045,12 @@ lstcon_batch_op(lstcon_batch_t *bat, int transop, } int -lstcon_batch_run(char *name, int timeout, struct list_head *result_up) +lstcon_batch_run(char *name, int timeout, struct list_head __user *result_up) { lstcon_batch_t *bat; int rc; - if (lstcon_batch_find(name, &bat) != 0) { + if (lstcon_batch_find(name, &bat)) { CDEBUG(D_NET, "Can't find batch %s\n", name); return -ENOENT; } @@ -1044,19 +1060,19 @@ lstcon_batch_run(char *name, int timeout, struct list_head *result_up) rc = lstcon_batch_op(bat, LST_TRANS_TSBRUN, result_up); /* mark batch as running if it's started in any node */ - if (lstcon_tsbop_stat_success(lstcon_trans_stat(), 0) != 0) + if (lstcon_tsbop_stat_success(lstcon_trans_stat(), 0)) bat->bat_state = LST_BATCH_RUNNING; return rc; } int -lstcon_batch_stop(char *name, int force, struct list_head *result_up) +lstcon_batch_stop(char *name, int force, struct list_head __user *result_up) { lstcon_batch_t *bat; int rc; - if (lstcon_batch_find(name, &bat) != 0) { + if (lstcon_batch_find(name, &bat)) { CDEBUG(D_NET, "Can't find batch %s\n", name); return -ENOENT; } @@ -1066,7 +1082,7 @@ lstcon_batch_stop(char *name, int force, struct list_head *result_up) rc = lstcon_batch_op(bat, LST_TRANS_TSBSTOP, result_up); /* mark batch as stopped if all RPCs finished */ - if (lstcon_tsbop_stat_failure(lstcon_trans_stat(), 0) == 0) + if (!lstcon_tsbop_stat_failure(lstcon_trans_stat(), 0)) bat->bat_state = LST_BATCH_IDLE; return rc; @@ -1083,7 +1099,7 @@ lstcon_batch_destroy(lstcon_batch_t *bat) while (!list_empty(&bat->bat_test_list)) { test = list_entry(bat->bat_test_list.next, - lstcon_test_t, tes_link); + lstcon_test_t, tes_link); LASSERT(list_empty(&test->tes_trans_list)); list_del(&test->tes_link); @@ -1099,7 +1115,7 @@ lstcon_batch_destroy(lstcon_batch_t *bat) while (!list_empty(&bat->bat_cli_list)) { ndl = list_entry(bat->bat_cli_list.next, - lstcon_ndlink_t, ndl_link); + lstcon_ndlink_t, ndl_link); list_del_init(&ndl->ndl_link); lstcon_ndlink_release(ndl); @@ -1107,7 +1123,7 @@ lstcon_batch_destroy(lstcon_batch_t *bat) while (!list_empty(&bat->bat_srv_list)) { ndl = list_entry(bat->bat_srv_list.next, - lstcon_ndlink_t, ndl_link); + lstcon_ndlink_t, ndl_link); list_del_init(&ndl->ndl_link); lstcon_ndlink_release(ndl); @@ -1135,10 +1151,10 @@ lstcon_testrpc_condition(int transop, lstcon_node_t *nd, void *arg) struct list_head *head; test = (lstcon_test_t *)arg; - LASSERT(test != NULL); + LASSERT(test); batch = test->tes_batch; - LASSERT(batch != NULL); + LASSERT(batch); if (test->tes_oneside && transop == LST_TRANS_TSBSRVADD) @@ -1160,7 +1176,7 @@ lstcon_testrpc_condition(int transop, lstcon_node_t *nd, void *arg) LASSERT(nd->nd_id.nid != LNET_NID_ANY); - if (lstcon_ndlink_find(hash, nd->nd_id, &ndl, 1) != 0) + if (lstcon_ndlink_find(hash, nd->nd_id, &ndl, 1)) return -ENOMEM; if (list_empty(&ndl->ndl_link)) @@ -1170,31 +1186,31 @@ lstcon_testrpc_condition(int transop, lstcon_node_t *nd, void *arg) } static int -lstcon_test_nodes_add(lstcon_test_t *test, struct list_head *result_up) +lstcon_test_nodes_add(lstcon_test_t *test, struct list_head __user *result_up) { lstcon_rpc_trans_t *trans; lstcon_group_t *grp; int transop; int rc; - LASSERT(test->tes_src_grp != NULL); - LASSERT(test->tes_dst_grp != NULL); + LASSERT(test->tes_src_grp); + LASSERT(test->tes_dst_grp); transop = LST_TRANS_TSBSRVADD; - grp = test->tes_dst_grp; + grp = test->tes_dst_grp; again: rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list, &test->tes_trans_list, transop, test, lstcon_testrpc_condition, &trans); - if (rc != 0) { + if (rc) { CERROR("Can't create transaction: %d\n", rc); return rc; } lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT); - if (lstcon_trans_stat()->trs_rpc_errno != 0 || - lstcon_trans_stat()->trs_fwk_errno != 0) { + if (lstcon_trans_stat()->trs_rpc_errno || + lstcon_trans_stat()->trs_fwk_errno) { lstcon_rpc_trans_interpreter(trans, result_up, NULL); lstcon_rpc_trans_destroy(trans); @@ -1226,7 +1242,7 @@ lstcon_verify_batch(const char *name, lstcon_batch_t **batch) int rc; rc = lstcon_batch_find(name, batch); - if (rc != 0) { + if (rc) { CDEBUG(D_NET, "Can't find batch %s\n", name); return rc; } @@ -1243,10 +1259,10 @@ static int lstcon_verify_group(const char *name, lstcon_group_t **grp) { int rc; - lstcon_ndlink_t *ndl; + lstcon_ndlink_t *ndl; rc = lstcon_group_find(name, grp); - if (rc != 0) { + if (rc) { CDEBUG(D_NET, "can't find group %s\n", name); return rc; } @@ -1266,13 +1282,13 @@ lstcon_test_add(char *batch_name, int type, int loop, int concur, int dist, int span, char *src_name, char *dst_name, void *param, int paramlen, int *retp, - struct list_head *result_up) + struct list_head __user *result_up) { - lstcon_test_t *test = NULL; - int rc; - lstcon_group_t *src_grp = NULL; - lstcon_group_t *dst_grp = NULL; - lstcon_batch_t *batch = NULL; + lstcon_test_t *test = NULL; + int rc; + lstcon_group_t *src_grp = NULL; + lstcon_group_t *dst_grp = NULL; + lstcon_batch_t *batch = NULL; /* * verify that a batch of the given name exists, and the groups @@ -1280,15 +1296,15 @@ lstcon_test_add(char *batch_name, int type, int loop, * active node */ rc = lstcon_verify_batch(batch_name, &batch); - if (rc != 0) + if (rc) goto out; rc = lstcon_verify_group(src_name, &src_grp); - if (rc != 0) + if (rc) goto out; rc = lstcon_verify_group(dst_name, &dst_grp); - if (rc != 0) + if (rc) goto out; if (dst_grp->grp_userland) @@ -1302,32 +1318,32 @@ lstcon_test_add(char *batch_name, int type, int loop, goto out; } - test->tes_hdr.tsb_id = batch->bat_hdr.tsb_id; - test->tes_batch = batch; - test->tes_type = type; - test->tes_oneside = 0; /* TODO */ - test->tes_loop = loop; - test->tes_concur = concur; - test->tes_stop_onerr = 1; /* TODO */ - test->tes_span = span; - test->tes_dist = dist; - test->tes_cliidx = 0; /* just used for creating RPC */ - test->tes_src_grp = src_grp; - test->tes_dst_grp = dst_grp; + test->tes_hdr.tsb_id = batch->bat_hdr.tsb_id; + test->tes_batch = batch; + test->tes_type = type; + test->tes_oneside = 0; /* TODO */ + test->tes_loop = loop; + test->tes_concur = concur; + test->tes_stop_onerr = 1; /* TODO */ + test->tes_span = span; + test->tes_dist = dist; + test->tes_cliidx = 0; /* just used for creating RPC */ + test->tes_src_grp = src_grp; + test->tes_dst_grp = dst_grp; INIT_LIST_HEAD(&test->tes_trans_list); - if (param != NULL) { + if (param) { test->tes_paramlen = paramlen; memcpy(&test->tes_param[0], param, paramlen); } rc = lstcon_test_nodes_add(test, result_up); - if (rc != 0) + if (rc) goto out; - if (lstcon_trans_stat()->trs_rpc_errno != 0 || - lstcon_trans_stat()->trs_fwk_errno != 0) + if (lstcon_trans_stat()->trs_rpc_errno || + lstcon_trans_stat()->trs_fwk_errno) CDEBUG(D_NET, "Failed to add test %d to batch %s\n", type, batch_name); @@ -1340,13 +1356,13 @@ lstcon_test_add(char *batch_name, int type, int loop, /* hold groups so nobody can change them */ return rc; out: - if (test != NULL) + if (test) LIBCFS_FREE(test, offsetof(lstcon_test_t, tes_param[paramlen])); - if (dst_grp != NULL) + if (dst_grp) lstcon_group_decref(dst_grp); - if (src_grp != NULL) + if (src_grp) lstcon_group_decref(src_grp); return rc; @@ -1369,16 +1385,16 @@ lstcon_test_find(lstcon_batch_t *batch, int idx, lstcon_test_t **testpp) static int lstcon_tsbrpc_readent(int transop, srpc_msg_t *msg, - lstcon_rpc_ent_t *ent_up) + lstcon_rpc_ent_t __user *ent_up) { srpc_batch_reply_t *rep = &msg->msg_body.bat_reply; LASSERT(transop == LST_TRANS_TSBCLIQRY || - transop == LST_TRANS_TSBSRVQRY); + transop == LST_TRANS_TSBSRVQRY); /* positive errno, framework error code */ - if (copy_to_user(&ent_up->rpe_priv[0], - &rep->bar_active, sizeof(rep->bar_active))) + if (copy_to_user(&ent_up->rpe_priv[0], &rep->bar_active, + sizeof(rep->bar_active))) return -EFAULT; return 0; @@ -1386,7 +1402,7 @@ lstcon_tsbrpc_readent(int transop, srpc_msg_t *msg, int lstcon_test_batch_query(char *name, int testidx, int client, - int timeout, struct list_head *result_up) + int timeout, struct list_head __user *result_up) { lstcon_rpc_trans_t *trans; struct list_head *translist; @@ -1398,43 +1414,43 @@ lstcon_test_batch_query(char *name, int testidx, int client, int rc; rc = lstcon_batch_find(name, &batch); - if (rc != 0) { + if (rc) { CDEBUG(D_NET, "Can't find batch: %s\n", name); return rc; } - if (testidx == 0) { + if (!testidx) { translist = &batch->bat_trans_list; - ndlist = &batch->bat_cli_list; - hdr = &batch->bat_hdr; + ndlist = &batch->bat_cli_list; + hdr = &batch->bat_hdr; } else { /* query specified test only */ rc = lstcon_test_find(batch, testidx, &test); - if (rc != 0) { + if (rc) { CDEBUG(D_NET, "Can't find test: %d\n", testidx); return rc; } translist = &test->tes_trans_list; - ndlist = &test->tes_src_grp->grp_ndl_list; - hdr = &test->tes_hdr; + ndlist = &test->tes_src_grp->grp_ndl_list; + hdr = &test->tes_hdr; } transop = client ? LST_TRANS_TSBCLIQRY : LST_TRANS_TSBSRVQRY; rc = lstcon_rpc_trans_ndlist(ndlist, translist, transop, hdr, lstcon_batrpc_condition, &trans); - if (rc != 0) { + if (rc) { CERROR("Can't create transaction: %d\n", rc); return rc; } lstcon_rpc_trans_postwait(trans, timeout); - if (testidx == 0 && /* query a batch, not a test */ - lstcon_rpc_stat_failure(lstcon_trans_stat(), 0) == 0 && - lstcon_tsbqry_stat_run(lstcon_trans_stat(), 0) == 0) { + if (!testidx && /* query a batch, not a test */ + !lstcon_rpc_stat_failure(lstcon_trans_stat(), 0) && + !lstcon_tsbqry_stat_run(lstcon_trans_stat(), 0)) { /* all RPCs finished, and no active test */ batch->bat_state = LST_BATCH_IDLE; } @@ -1448,19 +1464,19 @@ lstcon_test_batch_query(char *name, int testidx, int client, static int lstcon_statrpc_readent(int transop, srpc_msg_t *msg, - lstcon_rpc_ent_t *ent_up) + lstcon_rpc_ent_t __user *ent_up) { srpc_stat_reply_t *rep = &msg->msg_body.stat_reply; - sfw_counters_t *sfwk_stat; - srpc_counters_t *srpc_stat; - lnet_counters_t *lnet_stat; + sfw_counters_t __user *sfwk_stat; + srpc_counters_t __user *srpc_stat; + lnet_counters_t __user *lnet_stat; - if (rep->str_status != 0) + if (rep->str_status) return 0; - sfwk_stat = (sfw_counters_t *)&ent_up->rpe_payload[0]; - srpc_stat = (srpc_counters_t *)((char *)sfwk_stat + sizeof(*sfwk_stat)); - lnet_stat = (lnet_counters_t *)((char *)srpc_stat + sizeof(*srpc_stat)); + sfwk_stat = (sfw_counters_t __user *)&ent_up->rpe_payload[0]; + srpc_stat = (srpc_counters_t __user *)(sfwk_stat + 1); + lnet_stat = (lnet_counters_t __user *)(srpc_stat + 1); if (copy_to_user(sfwk_stat, &rep->str_fw, sizeof(*sfwk_stat)) || copy_to_user(srpc_stat, &rep->str_rpc, sizeof(*srpc_stat)) || @@ -1472,7 +1488,7 @@ lstcon_statrpc_readent(int transop, srpc_msg_t *msg, static int lstcon_ndlist_stat(struct list_head *ndlist, - int timeout, struct list_head *result_up) + int timeout, struct list_head __user *result_up) { struct list_head head; lstcon_rpc_trans_t *trans; @@ -1482,7 +1498,7 @@ lstcon_ndlist_stat(struct list_head *ndlist, rc = lstcon_rpc_trans_ndlist(ndlist, &head, LST_TRANS_STATQRY, NULL, NULL, &trans); - if (rc != 0) { + if (rc) { CERROR("Can't create transaction: %d\n", rc); return rc; } @@ -1497,13 +1513,14 @@ lstcon_ndlist_stat(struct list_head *ndlist, } int -lstcon_group_stat(char *grp_name, int timeout, struct list_head *result_up) +lstcon_group_stat(char *grp_name, int timeout, + struct list_head __user *result_up) { lstcon_group_t *grp; int rc; rc = lstcon_group_find(grp_name, &grp); - if (rc != 0) { + if (rc) { CDEBUG(D_NET, "Can't find group %s\n", grp_name); return rc; } @@ -1516,17 +1533,17 @@ lstcon_group_stat(char *grp_name, int timeout, struct list_head *result_up) } int -lstcon_nodes_stat(int count, lnet_process_id_t *ids_up, - int timeout, struct list_head *result_up) +lstcon_nodes_stat(int count, lnet_process_id_t __user *ids_up, + int timeout, struct list_head __user *result_up) { - lstcon_ndlink_t *ndl; + lstcon_ndlink_t *ndl; lstcon_group_t *tmp; lnet_process_id_t id; int i; int rc; rc = lstcon_group_alloc(NULL, &tmp); - if (rc != 0) { + if (rc) { CERROR("Out of memory\n"); return -ENOMEM; } @@ -1539,7 +1556,7 @@ lstcon_nodes_stat(int count, lnet_process_id_t *ids_up, /* add to tmp group */ rc = lstcon_group_ndlink_find(tmp, id, &ndl, 2); - if (rc != 0) { + if (rc) { CDEBUG((rc == -ENOMEM) ? D_ERROR : D_NET, "Failed to find or create %s: %d\n", libcfs_id2str(id), rc); @@ -1547,7 +1564,7 @@ lstcon_nodes_stat(int count, lnet_process_id_t *ids_up, } } - if (rc != 0) { + if (rc) { lstcon_group_decref(tmp); return rc; } @@ -1562,14 +1579,14 @@ lstcon_nodes_stat(int count, lnet_process_id_t *ids_up, static int lstcon_debug_ndlist(struct list_head *ndlist, struct list_head *translist, - int timeout, struct list_head *result_up) + int timeout, struct list_head __user *result_up) { lstcon_rpc_trans_t *trans; - int rc; + int rc; rc = lstcon_rpc_trans_ndlist(ndlist, translist, LST_TRANS_SESQRY, NULL, lstcon_sesrpc_condition, &trans); - if (rc != 0) { + if (rc) { CERROR("Can't create transaction: %d\n", rc); return rc; } @@ -1584,7 +1601,7 @@ lstcon_debug_ndlist(struct list_head *ndlist, } int -lstcon_session_debug(int timeout, struct list_head *result_up) +lstcon_session_debug(int timeout, struct list_head __user *result_up) { return lstcon_debug_ndlist(&console_session.ses_ndl_list, NULL, timeout, result_up); @@ -1592,13 +1609,13 @@ lstcon_session_debug(int timeout, struct list_head *result_up) int lstcon_batch_debug(int timeout, char *name, - int client, struct list_head *result_up) + int client, struct list_head __user *result_up) { lstcon_batch_t *bat; int rc; rc = lstcon_batch_find(name, &bat); - if (rc != 0) + if (rc) return -ENOENT; rc = lstcon_debug_ndlist(client ? &bat->bat_cli_list : @@ -1610,13 +1627,13 @@ lstcon_batch_debug(int timeout, char *name, int lstcon_group_debug(int timeout, char *name, - struct list_head *result_up) + struct list_head __user *result_up) { lstcon_group_t *grp; int rc; rc = lstcon_group_find(name, &grp); - if (rc != 0) + if (rc) return -ENOENT; rc = lstcon_debug_ndlist(&grp->grp_ndl_list, NULL, @@ -1628,8 +1645,8 @@ lstcon_group_debug(int timeout, char *name, int lstcon_nodes_debug(int timeout, - int count, lnet_process_id_t *ids_up, - struct list_head *result_up) + int count, lnet_process_id_t __user *ids_up, + struct list_head __user *result_up) { lnet_process_id_t id; lstcon_ndlink_t *ndl; @@ -1638,7 +1655,7 @@ lstcon_nodes_debug(int timeout, int rc; rc = lstcon_group_alloc(NULL, &grp); - if (rc != 0) { + if (rc) { CDEBUG(D_NET, "Out of memory\n"); return rc; } @@ -1651,13 +1668,13 @@ lstcon_nodes_debug(int timeout, /* node is added to tmp group */ rc = lstcon_group_ndlink_find(grp, id, &ndl, 1); - if (rc != 0) { + if (rc) { CERROR("Can't create node link\n"); break; } } - if (rc != 0) { + if (rc) { lstcon_group_decref(grp); return rc; } @@ -1673,8 +1690,8 @@ lstcon_nodes_debug(int timeout, int lstcon_session_match(lst_sid_t sid) { - return (console_session.ses_id.ses_nid == sid.ses_nid && - console_session.ses_id.ses_stamp == sid.ses_stamp) ? 1 : 0; + return (console_session.ses_id.ses_nid == sid.ses_nid && + console_session.ses_id.ses_stamp == sid.ses_stamp) ? 1 : 0; } static void @@ -1685,15 +1702,13 @@ lstcon_new_session_id(lst_sid_t *sid) LASSERT(console_session.ses_state == LST_SESSION_NONE); LNetGetId(1, &id); - sid->ses_nid = id.nid; + sid->ses_nid = id.nid; sid->ses_stamp = cfs_time_current(); } -extern srpc_service_t lstcon_acceptor_service; - int lstcon_session_new(char *name, int key, unsigned feats, - int timeout, int force, lst_sid_t *sid_up) + int timeout, int force, lst_sid_t __user *sid_up) { int rc = 0; int i; @@ -1709,11 +1724,11 @@ lstcon_session_new(char *name, int key, unsigned feats, rc = lstcon_session_end(); /* lstcon_session_end() only return local error */ - if (rc != 0) + if (rc) return rc; } - if ((feats & ~LST_FEATS_MASK) != 0) { + if (feats & ~LST_FEATS_MASK) { CNETERR("Unknown session features %x\n", (feats & ~LST_FEATS_MASK)); return -EINVAL; @@ -1731,15 +1746,18 @@ lstcon_session_new(char *name, int key, unsigned feats, console_session.ses_feats_updated = 0; console_session.ses_timeout = (timeout <= 0) ? LST_CONSOLE_TIMEOUT : timeout; - strlcpy(console_session.ses_name, name, + + if (strlen(name) > sizeof(console_session.ses_name) - 1) + return -E2BIG; + strncpy(console_session.ses_name, name, sizeof(console_session.ses_name)); rc = lstcon_batch_add(LST_DEFAULT_BATCH); - if (rc != 0) + if (rc) return rc; rc = lstcon_rpc_pinger_start(); - if (rc != 0) { + if (rc) { lstcon_batch_t *bat = NULL; lstcon_batch_find(LST_DEFAULT_BATCH, &bat); @@ -1748,8 +1766,8 @@ lstcon_session_new(char *name, int key, unsigned feats, return rc; } - if (copy_to_user(sid_up, &console_session.ses_id, - sizeof(lst_sid_t)) == 0) + if (!copy_to_user(sid_up, &console_session.ses_id, + sizeof(lst_sid_t))) return rc; lstcon_session_end(); @@ -1758,8 +1776,10 @@ lstcon_session_new(char *name, int key, unsigned feats, } int -lstcon_session_info(lst_sid_t *sid_up, int *key_up, unsigned *featp, - lstcon_ndlist_ent_t *ndinfo_up, char *name_up, int len) +lstcon_session_info(lst_sid_t __user *sid_up, int __user *key_up, + unsigned __user *featp, + lstcon_ndlist_ent_t __user *ndinfo_up, + char __user *name_up, int len) { lstcon_ndlist_ent_t *entp; lstcon_ndlink_t *ndl; @@ -1769,18 +1789,18 @@ lstcon_session_info(lst_sid_t *sid_up, int *key_up, unsigned *featp, return -ESRCH; LIBCFS_ALLOC(entp, sizeof(*entp)); - if (entp == NULL) + if (!entp) return -ENOMEM; list_for_each_entry(ndl, &console_session.ses_ndl_list, ndl_link) LST_NODE_STATE_COUNTER(ndl->ndl_node, entp); if (copy_to_user(sid_up, &console_session.ses_id, - sizeof(lst_sid_t)) || + sizeof(lst_sid_t)) || copy_to_user(key_up, &console_session.ses_key, - sizeof(*key_up)) || + sizeof(*key_up)) || copy_to_user(featp, &console_session.ses_features, - sizeof(*featp)) || + sizeof(*featp)) || copy_to_user(ndinfo_up, entp, sizeof(*entp)) || copy_to_user(name_up, console_session.ses_name, len)) rc = -EFAULT; @@ -1803,7 +1823,7 @@ lstcon_session_end(void) rc = lstcon_rpc_trans_ndlist(&console_session.ses_ndl_list, NULL, LST_TRANS_SESEND, NULL, lstcon_sesrpc_condition, &trans); - if (rc != 0) { + if (rc) { CERROR("Can't create transaction: %d\n", rc); return rc; } @@ -1820,16 +1840,16 @@ lstcon_session_end(void) /* waiting for orphan rpcs to die */ lstcon_rpc_cleanup_wait(); - console_session.ses_id = LST_INVALID_SID; + console_session.ses_id = LST_INVALID_SID; console_session.ses_state = LST_SESSION_NONE; - console_session.ses_key = 0; + console_session.ses_key = 0; console_session.ses_force = 0; console_session.ses_feats_updated = 0; /* destroy all batches */ while (!list_empty(&console_session.ses_bat_list)) { bat = list_entry(console_session.ses_bat_list.next, - lstcon_batch_t, bat_link); + lstcon_batch_t, bat_link); lstcon_batch_destroy(bat); } @@ -1837,7 +1857,7 @@ lstcon_session_end(void) /* destroy all groups */ while (!list_empty(&console_session.ses_grp_list)) { grp = list_entry(console_session.ses_grp_list.next, - lstcon_group_t, grp_link); + lstcon_group_t, grp_link); LASSERT(grp->grp_ref == 1); lstcon_group_decref(grp); @@ -1847,7 +1867,7 @@ lstcon_session_end(void) LASSERT(list_empty(&console_session.ses_ndl_list)); console_session.ses_shutdown = 0; - console_session.ses_expired = 0; + console_session.ses_expired = 0; return rc; } @@ -1857,7 +1877,7 @@ lstcon_session_feats_check(unsigned feats) { int rc = 0; - if ((feats & ~LST_FEATS_MASK) != 0) { + if (feats & ~LST_FEATS_MASK) { CERROR("Can't support these features: %x\n", (feats & ~LST_FEATS_MASK)); return -EPROTO; @@ -1875,7 +1895,7 @@ lstcon_session_feats_check(unsigned feats) spin_unlock(&console_session.ses_rpc_lock); - if (rc != 0) { + if (rc) { CERROR("remote features %x do not match with session features %x of console\n", feats, console_session.ses_features); } @@ -1886,13 +1906,13 @@ lstcon_session_feats_check(unsigned feats) static int lstcon_acceptor_handle(struct srpc_server_rpc *rpc) { - srpc_msg_t *rep = &rpc->srpc_replymsg; - srpc_msg_t *req = &rpc->srpc_reqstbuf->buf_msg; + srpc_msg_t *rep = &rpc->srpc_replymsg; + srpc_msg_t *req = &rpc->srpc_reqstbuf->buf_msg; srpc_join_reqst_t *jreq = &req->msg_body.join_reqst; srpc_join_reply_t *jrep = &rep->msg_body.join_reply; - lstcon_group_t *grp = NULL; + lstcon_group_t *grp = NULL; lstcon_ndlink_t *ndl; - int rc = 0; + int rc = 0; sfw_unpack_message(req); @@ -1905,26 +1925,26 @@ lstcon_acceptor_handle(struct srpc_server_rpc *rpc) goto out; } - if (lstcon_session_feats_check(req->msg_ses_feats) != 0) { + if (lstcon_session_feats_check(req->msg_ses_feats)) { jrep->join_status = EPROTO; goto out; } if (jreq->join_sid.ses_nid != LNET_NID_ANY && - !lstcon_session_match(jreq->join_sid)) { + !lstcon_session_match(jreq->join_sid)) { jrep->join_status = EBUSY; goto out; } - if (lstcon_group_find(jreq->join_group, &grp) != 0) { + if (lstcon_group_find(jreq->join_group, &grp)) { rc = lstcon_group_alloc(jreq->join_group, &grp); - if (rc != 0) { + if (rc) { CERROR("Out of memory\n"); goto out; } list_add_tail(&grp->grp_link, - &console_session.ses_grp_list); + &console_session.ses_grp_list); lstcon_group_addref(grp); } @@ -1935,31 +1955,31 @@ lstcon_acceptor_handle(struct srpc_server_rpc *rpc) } rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 0); - if (rc == 0) { + if (!rc) { jrep->join_status = EEXIST; goto out; } rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 1); - if (rc != 0) { + if (rc) { CERROR("Out of memory\n"); goto out; } - ndl->ndl_node->nd_state = LST_NODE_ACTIVE; + ndl->ndl_node->nd_state = LST_NODE_ACTIVE; ndl->ndl_node->nd_timeout = console_session.ses_timeout; - if (grp->grp_userland == 0) + if (!grp->grp_userland) grp->grp_userland = 1; strlcpy(jrep->join_session, console_session.ses_name, sizeof(jrep->join_session)); jrep->join_timeout = console_session.ses_timeout; - jrep->join_status = 0; + jrep->join_status = 0; out: rep->msg_ses_feats = console_session.ses_features; - if (grp != NULL) + if (grp) lstcon_group_decref(grp); mutex_unlock(&console_session.ses_mutex); @@ -1967,17 +1987,17 @@ out: return rc; } -srpc_service_t lstcon_acceptor_service; +static srpc_service_t lstcon_acceptor_service; static void lstcon_init_acceptor_service(void) { /* initialize selftest console acceptor service table */ - lstcon_acceptor_service.sv_name = "join session"; - lstcon_acceptor_service.sv_handler = lstcon_acceptor_handle; - lstcon_acceptor_service.sv_id = SRPC_SERVICE_JOIN; + lstcon_acceptor_service.sv_name = "join session"; + lstcon_acceptor_service.sv_handler = lstcon_acceptor_handle; + lstcon_acceptor_service.sv_id = SRPC_SERVICE_JOIN; lstcon_acceptor_service.sv_wi_total = SFW_FRWK_WI_MAX; } -extern int lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_data *data); +extern int lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_hdr *hdr); static DECLARE_IOCTL_HANDLER(lstcon_ioctl_handler, lstcon_ioctl_entry); @@ -1988,16 +2008,16 @@ lstcon_console_init(void) int i; int rc; - memset(&console_session, 0, sizeof(lstcon_session_t)); + memset(&console_session, 0, sizeof(struct lstcon_session)); - console_session.ses_id = LST_INVALID_SID; - console_session.ses_state = LST_SESSION_NONE; - console_session.ses_timeout = 0; - console_session.ses_force = 0; - console_session.ses_expired = 0; + console_session.ses_id = LST_INVALID_SID; + console_session.ses_state = LST_SESSION_NONE; + console_session.ses_timeout = 0; + console_session.ses_force = 0; + console_session.ses_expired = 0; console_session.ses_feats_updated = 0; - console_session.ses_features = LST_FEATS_MASK; - console_session.ses_laststamp = ktime_get_real_seconds(); + console_session.ses_features = LST_FEATS_MASK; + console_session.ses_laststamp = ktime_get_real_seconds(); mutex_init(&console_session.ses_mutex); @@ -2008,7 +2028,7 @@ lstcon_console_init(void) LIBCFS_ALLOC(console_session.ses_ndl_hash, sizeof(struct list_head) * LST_GLOBAL_HASHSIZE); - if (console_session.ses_ndl_hash == NULL) + if (!console_session.ses_ndl_hash) return -ENOMEM; for (i = 0; i < LST_GLOBAL_HASHSIZE; i++) @@ -2019,7 +2039,7 @@ lstcon_console_init(void) rc = srpc_add_service(&lstcon_acceptor_service); LASSERT(rc != -EBUSY); - if (rc != 0) { + if (rc) { LIBCFS_FREE(console_session.ses_ndl_hash, sizeof(struct list_head) * LST_GLOBAL_HASHSIZE); return rc; @@ -2027,14 +2047,14 @@ lstcon_console_init(void) rc = srpc_service_add_buffers(&lstcon_acceptor_service, lstcon_acceptor_service.sv_wi_total); - if (rc != 0) { + if (rc) { rc = -ENOMEM; goto out; } rc = libcfs_register_ioctl(&lstcon_ioctl_handler); - if (rc == 0) { + if (!rc) { lstcon_rpc_module_init(); return 0; } @@ -2075,9 +2095,8 @@ lstcon_console_fini(void) LASSERT(list_empty(&console_session.ses_bat_list)); LASSERT(list_empty(&console_session.ses_trans_list)); - for (i = 0; i < LST_NODE_HASHSIZE; i++) { + for (i = 0; i < LST_NODE_HASHSIZE; i++) LASSERT(list_empty(&console_session.ses_ndl_hash[i])); - } LIBCFS_FREE(console_session.ses_ndl_hash, sizeof(struct list_head) * LST_GLOBAL_HASHSIZE); diff --git a/drivers/staging/lustre/lnet/selftest/console.h b/drivers/staging/lustre/lnet/selftest/console.h index 3f3286c0c..554f58244 100644 --- a/drivers/staging/lustre/lnet/selftest/console.h +++ b/drivers/staging/lustre/lnet/selftest/console.h @@ -52,79 +52,79 @@ typedef struct lstcon_node { lnet_process_id_t nd_id; /* id of the node */ - int nd_ref; /* reference count */ - int nd_state; /* state of the node */ - int nd_timeout; /* session timeout */ - unsigned long nd_stamp; /* timestamp of last replied RPC */ + int nd_ref; /* reference count */ + int nd_state; /* state of the node */ + int nd_timeout; /* session timeout */ + unsigned long nd_stamp; /* timestamp of last replied RPC */ struct lstcon_rpc nd_ping; /* ping rpc */ } lstcon_node_t; /* node descriptor */ typedef struct { struct list_head ndl_link; /* chain on list */ struct list_head ndl_hlink; /* chain on hash */ - lstcon_node_t *ndl_node; /* pointer to node */ + lstcon_node_t *ndl_node; /* pointer to node */ } lstcon_ndlink_t; /* node link descriptor */ typedef struct { - struct list_head grp_link; /* chain on global group list + struct list_head grp_link; /* chain on global group list */ - int grp_ref; /* reference count */ - int grp_userland; /* has userland nodes */ - int grp_nnode; /* # of nodes */ - char grp_name[LST_NAME_SIZE]; /* group name */ - - struct list_head grp_trans_list; /* transaction list */ - struct list_head grp_ndl_list; /* nodes list */ - struct list_head grp_ndl_hash[0]; /* hash table for nodes */ + int grp_ref; /* reference count */ + int grp_userland; /* has userland nodes */ + int grp_nnode; /* # of nodes */ + char grp_name[LST_NAME_SIZE]; /* group name */ + + struct list_head grp_trans_list; /* transaction list */ + struct list_head grp_ndl_list; /* nodes list */ + struct list_head grp_ndl_hash[0]; /* hash table for nodes */ } lstcon_group_t; /* (alias of nodes) group descriptor */ -#define LST_BATCH_IDLE 0xB0 /* idle batch */ +#define LST_BATCH_IDLE 0xB0 /* idle batch */ #define LST_BATCH_RUNNING 0xB1 /* running batch */ typedef struct lstcon_tsb_hdr { - lst_bid_t tsb_id; /* batch ID */ - int tsb_index; /* test index */ + lst_bid_t tsb_id; /* batch ID */ + int tsb_index; /* test index */ } lstcon_tsb_hdr_t; typedef struct { - lstcon_tsb_hdr_t bat_hdr; /* test_batch header */ - struct list_head bat_link; /* chain on session's batches list */ - int bat_ntest; /* # of test */ - int bat_state; /* state of the batch */ - int bat_arg; /* parameter for run|stop, timeout + lstcon_tsb_hdr_t bat_hdr; /* test_batch header */ + struct list_head bat_link; /* chain on session's batches list */ + int bat_ntest; /* # of test */ + int bat_state; /* state of the batch */ + int bat_arg; /* parameter for run|stop, timeout * for run, force for stop */ - char bat_name[LST_NAME_SIZE];/* name of batch */ + char bat_name[LST_NAME_SIZE];/* name of batch */ struct list_head bat_test_list; /* list head of tests (lstcon_test_t) */ struct list_head bat_trans_list; /* list head of transaction */ - struct list_head bat_cli_list; /* list head of client nodes + struct list_head bat_cli_list; /* list head of client nodes * (lstcon_node_t) */ struct list_head *bat_cli_hash; /* hash table of client nodes */ - struct list_head bat_srv_list; /* list head of server nodes */ + struct list_head bat_srv_list; /* list head of server nodes */ struct list_head *bat_srv_hash; /* hash table of server nodes */ } lstcon_batch_t; /* (tests ) batch descriptor */ typedef struct lstcon_test { - lstcon_tsb_hdr_t tes_hdr; /* test batch header */ - struct list_head tes_link; /* chain on batch's tests list */ - lstcon_batch_t *tes_batch; /* pointer to batch */ - - int tes_type; /* type of the test, i.e: bulk, ping */ - int tes_stop_onerr; /* stop on error */ - int tes_oneside; /* one-sided test */ - int tes_concur; /* concurrency */ - int tes_loop; /* loop count */ - int tes_dist; /* nodes distribution of target group */ - int tes_span; /* nodes span of target group */ - int tes_cliidx; /* client index, used for RPC creating */ + lstcon_tsb_hdr_t tes_hdr; /* test batch header */ + struct list_head tes_link; /* chain on batch's tests list */ + lstcon_batch_t *tes_batch; /* pointer to batch */ + + int tes_type; /* type of the test, i.e: bulk, ping */ + int tes_stop_onerr; /* stop on error */ + int tes_oneside; /* one-sided test */ + int tes_concur; /* concurrency */ + int tes_loop; /* loop count */ + int tes_dist; /* nodes distribution of target group */ + int tes_span; /* nodes span of target group */ + int tes_cliidx; /* client index, used for RPC creating */ struct list_head tes_trans_list; /* transaction list */ - lstcon_group_t *tes_src_grp; /* group run the test */ - lstcon_group_t *tes_dst_grp; /* target group */ + lstcon_group_t *tes_src_grp; /* group run the test */ + lstcon_group_t *tes_dst_grp; /* target group */ - int tes_paramlen; /* test parameter length */ - char tes_param[0]; /* test parameter */ + int tes_paramlen; /* test parameter length */ + char tes_param[0]; /* test parameter */ } lstcon_test_t; /* a single test descriptor */ #define LST_GLOBAL_HASHSIZE 503 /* global nodes hash table size */ @@ -135,25 +135,25 @@ typedef struct lstcon_test { #define LST_CONSOLE_TIMEOUT 300 /* default console timeout */ -typedef struct { - struct mutex ses_mutex; /* only 1 thread in session */ - lst_sid_t ses_id; /* global session id */ - int ses_key; /* local session key */ - int ses_state; /* state of session */ - int ses_timeout; /* timeout in seconds */ - time64_t ses_laststamp; /* last operation stamp (seconds) +struct lstcon_session { + struct mutex ses_mutex; /* only 1 thread in session */ + lst_sid_t ses_id; /* global session id */ + int ses_key; /* local session key */ + int ses_state; /* state of session */ + int ses_timeout; /* timeout in seconds */ + time64_t ses_laststamp; /* last operation stamp (seconds) */ - unsigned ses_features; /* tests features of the session + unsigned ses_features; /* tests features of the session */ - unsigned ses_feats_updated:1; /* features are synced with + unsigned ses_feats_updated:1; /* features are synced with * remote test nodes */ - unsigned ses_force:1; /* force creating */ - unsigned ses_shutdown:1; /* session is shutting down */ - unsigned ses_expired:1; /* console is timedout */ - __u64 ses_id_cookie; /* batch id cookie */ - char ses_name[LST_NAME_SIZE];/* session name */ - lstcon_rpc_trans_t *ses_ping; /* session pinger */ - stt_timer_t ses_ping_timer; /* timer for pinger */ + unsigned ses_force:1; /* force creating */ + unsigned ses_shutdown:1; /* session is shutting down */ + unsigned ses_expired:1; /* console is timedout */ + __u64 ses_id_cookie; /* batch id cookie */ + char ses_name[LST_NAME_SIZE];/* session name */ + lstcon_rpc_trans_t *ses_ping; /* session pinger */ + struct stt_timer ses_ping_timer; /* timer for pinger */ lstcon_trans_stat_t ses_trans_stat; /* transaction stats */ struct list_head ses_trans_list; /* global list of transaction */ @@ -162,12 +162,12 @@ typedef struct { struct list_head ses_ndl_list; /* global list of nodes */ struct list_head *ses_ndl_hash; /* hash table of nodes */ - spinlock_t ses_rpc_lock; /* serialize */ - atomic_t ses_rpc_counter; /* # of initialized RPCs */ + spinlock_t ses_rpc_lock; /* serialize */ + atomic_t ses_rpc_counter; /* # of initialized RPCs */ struct list_head ses_rpc_freelist; /* idle console rpc */ -} lstcon_session_t; /* session descriptor */ +}; /* session descriptor */ -extern lstcon_session_t console_session; +extern struct lstcon_session console_session; static inline lstcon_trans_stat_t * lstcon_trans_stat(void) @@ -176,7 +176,7 @@ lstcon_trans_stat(void) } static inline struct list_head * -lstcon_id2hash (lnet_process_id_t id, struct list_head *hash) +lstcon_id2hash(lnet_process_id_t id, struct list_head *hash) { unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE; @@ -184,51 +184,54 @@ lstcon_id2hash (lnet_process_id_t id, struct list_head *hash) } int lstcon_console_init(void); -int lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_data *data); int lstcon_console_fini(void); int lstcon_session_match(lst_sid_t sid); int lstcon_session_new(char *name, int key, unsigned version, - int timeout, int flags, lst_sid_t *sid_up); -int lstcon_session_info(lst_sid_t *sid_up, int *key, unsigned *verp, - lstcon_ndlist_ent_t *entp, char *name_up, int len); + int timeout, int flags, lst_sid_t __user *sid_up); +int lstcon_session_info(lst_sid_t __user *sid_up, int __user *key, + unsigned __user *verp, lstcon_ndlist_ent_t __user *entp, + char __user *name_up, int len); int lstcon_session_end(void); -int lstcon_session_debug(int timeout, struct list_head *result_up); +int lstcon_session_debug(int timeout, struct list_head __user *result_up); int lstcon_session_feats_check(unsigned feats); int lstcon_batch_debug(int timeout, char *name, - int client, struct list_head *result_up); + int client, struct list_head __user *result_up); int lstcon_group_debug(int timeout, char *name, - struct list_head *result_up); -int lstcon_nodes_debug(int timeout, int nnd, lnet_process_id_t *nds_up, - struct list_head *result_up); + struct list_head __user *result_up); +int lstcon_nodes_debug(int timeout, int nnd, lnet_process_id_t __user *nds_up, + struct list_head __user *result_up); int lstcon_group_add(char *name); int lstcon_group_del(char *name); int lstcon_group_clean(char *name, int args); -int lstcon_group_refresh(char *name, struct list_head *result_up); -int lstcon_nodes_add(char *name, int nnd, lnet_process_id_t *nds_up, - unsigned *featp, struct list_head *result_up); -int lstcon_nodes_remove(char *name, int nnd, lnet_process_id_t *nds_up, - struct list_head *result_up); -int lstcon_group_info(char *name, lstcon_ndlist_ent_t *gent_up, - int *index_p, int *ndent_p, lstcon_node_ent_t *ndents_up); -int lstcon_group_list(int idx, int len, char *name_up); +int lstcon_group_refresh(char *name, struct list_head __user *result_up); +int lstcon_nodes_add(char *name, int nnd, lnet_process_id_t __user *nds_up, + unsigned *featp, struct list_head __user *result_up); +int lstcon_nodes_remove(char *name, int nnd, lnet_process_id_t __user *nds_up, + struct list_head __user *result_up); +int lstcon_group_info(char *name, lstcon_ndlist_ent_t __user *gent_up, + int *index_p, int *ndent_p, + lstcon_node_ent_t __user *ndents_up); +int lstcon_group_list(int idx, int len, char __user *name_up); int lstcon_batch_add(char *name); -int lstcon_batch_run(char *name, int timeout, struct list_head *result_up); -int lstcon_batch_stop(char *name, int force, struct list_head *result_up); +int lstcon_batch_run(char *name, int timeout, + struct list_head __user *result_up); +int lstcon_batch_stop(char *name, int force, + struct list_head __user *result_up); int lstcon_test_batch_query(char *name, int testidx, int client, int timeout, - struct list_head *result_up); + struct list_head __user *result_up); int lstcon_batch_del(char *name); -int lstcon_batch_list(int idx, int namelen, char *name_up); -int lstcon_batch_info(char *name, lstcon_test_batch_ent_t *ent_up, +int lstcon_batch_list(int idx, int namelen, char __user *name_up); +int lstcon_batch_info(char *name, lstcon_test_batch_ent_t __user *ent_up, int server, int testidx, int *index_p, - int *ndent_p, lstcon_node_ent_t *dents_up); + int *ndent_p, lstcon_node_ent_t __user *dents_up); int lstcon_group_stat(char *grp_name, int timeout, - struct list_head *result_up); -int lstcon_nodes_stat(int count, lnet_process_id_t *ids_up, - int timeout, struct list_head *result_up); + struct list_head __user *result_up); +int lstcon_nodes_stat(int count, lnet_process_id_t __user *ids_up, + int timeout, struct list_head __user *result_up); int lstcon_test_add(char *batch_name, int type, int loop, int concur, int dist, int span, char *src_name, char *dst_name, void *param, int paramlen, int *retp, - struct list_head *result_up); + struct list_head __user *result_up); #endif diff --git a/drivers/staging/lustre/lnet/selftest/framework.c b/drivers/staging/lustre/lnet/selftest/framework.c index 1a2da7430..e2c532399 100644 --- a/drivers/staging/lustre/lnet/selftest/framework.c +++ b/drivers/staging/lustre/lnet/selftest/framework.c @@ -53,64 +53,64 @@ static int rpc_timeout = 64; module_param(rpc_timeout, int, 0644); MODULE_PARM_DESC(rpc_timeout, "rpc timeout in seconds (64 by default, 0 == never)"); -#define sfw_unpack_id(id) \ -do { \ - __swab64s(&(id).nid); \ - __swab32s(&(id).pid); \ +#define sfw_unpack_id(id) \ +do { \ + __swab64s(&(id).nid); \ + __swab32s(&(id).pid); \ } while (0) -#define sfw_unpack_sid(sid) \ -do { \ - __swab64s(&(sid).ses_nid); \ - __swab64s(&(sid).ses_stamp); \ +#define sfw_unpack_sid(sid) \ +do { \ + __swab64s(&(sid).ses_nid); \ + __swab64s(&(sid).ses_stamp); \ } while (0) -#define sfw_unpack_fw_counters(fc) \ -do { \ - __swab32s(&(fc).running_ms); \ +#define sfw_unpack_fw_counters(fc) \ +do { \ + __swab32s(&(fc).running_ms); \ __swab32s(&(fc).active_batches); \ __swab32s(&(fc).zombie_sessions); \ - __swab32s(&(fc).brw_errors); \ - __swab32s(&(fc).ping_errors); \ + __swab32s(&(fc).brw_errors); \ + __swab32s(&(fc).ping_errors); \ } while (0) -#define sfw_unpack_rpc_counters(rc) \ -do { \ +#define sfw_unpack_rpc_counters(rc) \ +do { \ __swab32s(&(rc).errors); \ - __swab32s(&(rc).rpcs_sent); \ - __swab32s(&(rc).rpcs_rcvd); \ - __swab32s(&(rc).rpcs_dropped); \ - __swab32s(&(rc).rpcs_expired); \ - __swab64s(&(rc).bulk_get); \ - __swab64s(&(rc).bulk_put); \ + __swab32s(&(rc).rpcs_sent); \ + __swab32s(&(rc).rpcs_rcvd); \ + __swab32s(&(rc).rpcs_dropped); \ + __swab32s(&(rc).rpcs_expired); \ + __swab64s(&(rc).bulk_get); \ + __swab64s(&(rc).bulk_put); \ } while (0) -#define sfw_unpack_lnet_counters(lc) \ -do { \ +#define sfw_unpack_lnet_counters(lc) \ +do { \ __swab32s(&(lc).errors); \ - __swab32s(&(lc).msgs_max); \ - __swab32s(&(lc).msgs_alloc); \ - __swab32s(&(lc).send_count); \ - __swab32s(&(lc).recv_count); \ - __swab32s(&(lc).drop_count); \ - __swab32s(&(lc).route_count); \ - __swab64s(&(lc).send_length); \ - __swab64s(&(lc).recv_length); \ - __swab64s(&(lc).drop_length); \ - __swab64s(&(lc).route_length); \ + __swab32s(&(lc).msgs_max); \ + __swab32s(&(lc).msgs_alloc); \ + __swab32s(&(lc).send_count); \ + __swab32s(&(lc).recv_count); \ + __swab32s(&(lc).drop_count); \ + __swab32s(&(lc).route_count); \ + __swab64s(&(lc).send_length); \ + __swab64s(&(lc).recv_length); \ + __swab64s(&(lc).drop_length); \ + __swab64s(&(lc).route_length); \ } while (0) -#define sfw_test_active(t) (atomic_read(&(t)->tsi_nactive) != 0) -#define sfw_batch_active(b) (atomic_read(&(b)->bat_nactive) != 0) +#define sfw_test_active(t) (atomic_read(&(t)->tsi_nactive)) +#define sfw_batch_active(b) (atomic_read(&(b)->bat_nactive)) static struct smoketest_framework { struct list_head fw_zombie_rpcs; /* RPCs to be recycled */ struct list_head fw_zombie_sessions; /* stopping sessions */ - struct list_head fw_tests; /* registered test cases */ - atomic_t fw_nzombies; /* # zombie sessions */ - spinlock_t fw_lock; /* serialise */ - sfw_session_t *fw_session; /* _the_ session */ - int fw_shuttingdown; /* shutdown in progress */ + struct list_head fw_tests; /* registered test cases */ + atomic_t fw_nzombies; /* # zombie sessions */ + spinlock_t fw_lock; /* serialise */ + sfw_session_t *fw_session; /* _the_ session */ + int fw_shuttingdown; /* shutdown in progress */ struct srpc_server_rpc *fw_active_srpc;/* running RPC */ } sfw_data; @@ -139,17 +139,17 @@ sfw_register_test(srpc_service_t *service, sfw_test_client_ops_t *cliops) { sfw_test_case_t *tsc; - if (sfw_find_test_case(service->sv_id) != NULL) { + if (sfw_find_test_case(service->sv_id)) { CERROR("Failed to register test %s (%d)\n", - service->sv_name, service->sv_id); + service->sv_name, service->sv_id); return -EEXIST; } LIBCFS_ALLOC(tsc, sizeof(sfw_test_case_t)); - if (tsc == NULL) + if (!tsc) return -ENOMEM; - tsc->tsc_cli_ops = cliops; + tsc->tsc_cli_ops = cliops; tsc->tsc_srv_service = service; list_add_tail(&tsc->tsc_list, &sfw_data.fw_tests); @@ -160,11 +160,11 @@ static void sfw_add_session_timer(void) { sfw_session_t *sn = sfw_data.fw_session; - stt_timer_t *timer = &sn->sn_timer; + struct stt_timer *timer = &sn->sn_timer; LASSERT(!sfw_data.fw_shuttingdown); - if (sn == NULL || sn->sn_timeout == 0) + if (!sn || !sn->sn_timeout) return; LASSERT(!sn->sn_timer_active); @@ -172,7 +172,6 @@ sfw_add_session_timer(void) sn->sn_timer_active = 1; timer->stt_expires = ktime_get_real_seconds() + sn->sn_timeout; stt_add_timer(timer); - return; } static int @@ -180,10 +179,10 @@ sfw_del_session_timer(void) { sfw_session_t *sn = sfw_data.fw_session; - if (sn == NULL || !sn->sn_timer_active) + if (!sn || !sn->sn_timer_active) return 0; - LASSERT(sn->sn_timeout != 0); + LASSERT(sn->sn_timeout); if (stt_del_timer(&sn->sn_timer)) { /* timer defused */ sn->sn_timer_active = 0; @@ -195,14 +194,14 @@ sfw_del_session_timer(void) static void sfw_deactivate_session(void) - __must_hold(&sfw_data.fw_lock) +__must_hold(&sfw_data.fw_lock) { sfw_session_t *sn = sfw_data.fw_session; int nactive = 0; sfw_batch_t *tsb; sfw_test_case_t *tsc; - if (sn == NULL) + if (!sn) return; LASSERT(!sn->sn_timer_active); @@ -226,7 +225,7 @@ sfw_deactivate_session(void) } } - if (nactive != 0) + if (nactive) return; /* wait for active batches to stop */ list_del_init(&sn->sn_list); @@ -248,8 +247,8 @@ sfw_session_expired(void *data) LASSERT(sn == sfw_data.fw_session); CWARN("Session expired! sid: %s-%llu, name: %s\n", - libcfs_nid2str(sn->sn_id.ses_nid), - sn->sn_id.ses_stamp, &sn->sn_name[0]); + libcfs_nid2str(sn->sn_id.ses_nid), + sn->sn_id.ses_stamp, &sn->sn_name[0]); sn->sn_timer_active = 0; sfw_deactivate_session(); @@ -261,7 +260,7 @@ static inline void sfw_init_session(sfw_session_t *sn, lst_sid_t sid, unsigned features, const char *name) { - stt_timer_t *timer = &sn->sn_timer; + struct stt_timer *timer = &sn->sn_timer; memset(sn, 0, sizeof(sfw_session_t)); INIT_LIST_HEAD(&sn->sn_list); @@ -272,10 +271,10 @@ sfw_init_session(sfw_session_t *sn, lst_sid_t sid, strlcpy(&sn->sn_name[0], name, sizeof(sn->sn_name)); sn->sn_timer_active = 0; - sn->sn_id = sid; - sn->sn_features = features; - sn->sn_timeout = session_timeout; - sn->sn_started = cfs_time_current(); + sn->sn_id = sid; + sn->sn_features = features; + sn->sn_timeout = session_timeout; + sn->sn_started = cfs_time_current(); timer->stt_data = sn; timer->stt_func = sfw_session_expired; @@ -289,29 +288,26 @@ sfw_server_rpc_done(struct srpc_server_rpc *rpc) struct srpc_service *sv = rpc->srpc_scd->scd_svc; int status = rpc->srpc_status; - CDEBUG(D_NET, - "Incoming framework RPC done: service %s, peer %s, status %s:%d\n", - sv->sv_name, libcfs_id2str(rpc->srpc_peer), - swi_state2str(rpc->srpc_wi.swi_state), - status); + CDEBUG(D_NET, "Incoming framework RPC done: service %s, peer %s, status %s:%d\n", + sv->sv_name, libcfs_id2str(rpc->srpc_peer), + swi_state2str(rpc->srpc_wi.swi_state), + status); - if (rpc->srpc_bulk != NULL) + if (rpc->srpc_bulk) sfw_free_pages(rpc); - return; } static void sfw_client_rpc_fini(srpc_client_rpc_t *rpc) { - LASSERT(rpc->crpc_bulk.bk_niov == 0); + LASSERT(!rpc->crpc_bulk.bk_niov); LASSERT(list_empty(&rpc->crpc_list)); - LASSERT(atomic_read(&rpc->crpc_refcount) == 0); + LASSERT(!atomic_read(&rpc->crpc_refcount)); - CDEBUG(D_NET, - "Outgoing framework RPC done: service %d, peer %s, status %s:%d:%d\n", - rpc->crpc_service, libcfs_id2str(rpc->crpc_dest), - swi_state2str(rpc->crpc_wi.swi_state), - rpc->crpc_aborted, rpc->crpc_status); + CDEBUG(D_NET, "Outgoing framework RPC done: service %d, peer %s, status %s:%d:%d\n", + rpc->crpc_service, libcfs_id2str(rpc->crpc_dest), + swi_state2str(rpc->crpc_wi.swi_state), + rpc->crpc_aborted, rpc->crpc_status); spin_lock(&sfw_data.fw_lock); @@ -328,7 +324,7 @@ sfw_find_batch(lst_bid_t bid) sfw_session_t *sn = sfw_data.fw_session; sfw_batch_t *bat; - LASSERT(sn != NULL); + LASSERT(sn); list_for_each_entry(bat, &sn->sn_batches, bat_list) { if (bat->bat_id.bat_id == bid.bat_id) @@ -344,19 +340,19 @@ sfw_bid2batch(lst_bid_t bid) sfw_session_t *sn = sfw_data.fw_session; sfw_batch_t *bat; - LASSERT(sn != NULL); + LASSERT(sn); bat = sfw_find_batch(bid); - if (bat != NULL) + if (bat) return bat; LIBCFS_ALLOC(bat, sizeof(sfw_batch_t)); - if (bat == NULL) + if (!bat) return NULL; - bat->bat_error = 0; - bat->bat_session = sn; - bat->bat_id = bid; + bat->bat_error = 0; + bat->bat_session = sn; + bat->bat_id = bid; atomic_set(&bat->bat_nactive, 0); INIT_LIST_HEAD(&bat->bat_tests); @@ -371,14 +367,14 @@ sfw_get_stats(srpc_stat_reqst_t *request, srpc_stat_reply_t *reply) sfw_counters_t *cnt = &reply->str_fw; sfw_batch_t *bat; - reply->str_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id; + reply->str_sid = !sn ? LST_INVALID_SID : sn->sn_id; if (request->str_sid.ses_nid == LNET_NID_ANY) { reply->str_status = EINVAL; return 0; } - if (sn == NULL || !sfw_sid_equal(request->str_sid, sn->sn_id)) { + if (!sn || !sfw_sid_equal(request->str_sid, sn->sn_id)) { reply->str_status = ESRCH; return 0; } @@ -386,11 +382,13 @@ sfw_get_stats(srpc_stat_reqst_t *request, srpc_stat_reply_t *reply) lnet_counters_get(&reply->str_lnet); srpc_get_counters(&reply->str_rpc); - /* send over the msecs since the session was started - - with 32 bits to send, this is ~49 days */ - cnt->running_ms = jiffies_to_msecs(jiffies - sn->sn_started); - cnt->brw_errors = atomic_read(&sn->sn_brw_errors); - cnt->ping_errors = atomic_read(&sn->sn_ping_errors); + /* + * send over the msecs since the session was started + * with 32 bits to send, this is ~49 days + */ + cnt->running_ms = jiffies_to_msecs(jiffies - sn->sn_started); + cnt->brw_errors = atomic_read(&sn->sn_brw_errors); + cnt->ping_errors = atomic_read(&sn->sn_ping_errors); cnt->zombie_sessions = atomic_read(&sfw_data.fw_nzombies); cnt->active_batches = 0; @@ -408,18 +406,18 @@ sfw_make_session(srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply) { sfw_session_t *sn = sfw_data.fw_session; srpc_msg_t *msg = container_of(request, srpc_msg_t, - msg_body.mksn_reqst); + msg_body.mksn_reqst); int cplen = 0; if (request->mksn_sid.ses_nid == LNET_NID_ANY) { - reply->mksn_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id; + reply->mksn_sid = !sn ? LST_INVALID_SID : sn->sn_id; reply->mksn_status = EINVAL; return 0; } - if (sn != NULL) { - reply->mksn_status = 0; - reply->mksn_sid = sn->sn_id; + if (sn) { + reply->mksn_status = 0; + reply->mksn_sid = sn->sn_id; reply->mksn_timeout = sn->sn_timeout; if (sfw_sid_equal(request->mksn_sid, sn->sn_id)) { @@ -437,21 +435,23 @@ sfw_make_session(srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply) } } - /* reject the request if it requires unknown features + /* + * reject the request if it requires unknown features * NB: old version will always accept all features because it's not * aware of srpc_msg_t::msg_ses_feats, it's a defect but it's also * harmless because it will return zero feature to console, and it's * console's responsibility to make sure all nodes in a session have - * same feature mask. */ - if ((msg->msg_ses_feats & ~LST_FEATS_MASK) != 0) { + * same feature mask. + */ + if (msg->msg_ses_feats & ~LST_FEATS_MASK) { reply->mksn_status = EPROTO; return 0; } /* brand new or create by force */ LIBCFS_ALLOC(sn, sizeof(sfw_session_t)); - if (sn == NULL) { - CERROR("Dropping RPC (mksn) under memory pressure.\n"); + if (!sn) { + CERROR("dropping RPC mksn under memory pressure\n"); return -ENOMEM; } @@ -461,13 +461,13 @@ sfw_make_session(srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply) spin_lock(&sfw_data.fw_lock); sfw_deactivate_session(); - LASSERT(sfw_data.fw_session == NULL); + LASSERT(!sfw_data.fw_session); sfw_data.fw_session = sn; spin_unlock(&sfw_data.fw_lock); - reply->mksn_status = 0; - reply->mksn_sid = sn->sn_id; + reply->mksn_status = 0; + reply->mksn_sid = sn->sn_id; reply->mksn_timeout = sn->sn_timeout; return 0; } @@ -477,15 +477,15 @@ sfw_remove_session(srpc_rmsn_reqst_t *request, srpc_rmsn_reply_t *reply) { sfw_session_t *sn = sfw_data.fw_session; - reply->rmsn_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id; + reply->rmsn_sid = !sn ? LST_INVALID_SID : sn->sn_id; if (request->rmsn_sid.ses_nid == LNET_NID_ANY) { reply->rmsn_status = EINVAL; return 0; } - if (sn == NULL || !sfw_sid_equal(request->rmsn_sid, sn->sn_id)) { - reply->rmsn_status = (sn == NULL) ? ESRCH : EBUSY; + if (!sn || !sfw_sid_equal(request->rmsn_sid, sn->sn_id)) { + reply->rmsn_status = !sn ? ESRCH : EBUSY; return 0; } @@ -499,8 +499,8 @@ sfw_remove_session(srpc_rmsn_reqst_t *request, srpc_rmsn_reply_t *reply) spin_unlock(&sfw_data.fw_lock); reply->rmsn_status = 0; - reply->rmsn_sid = LST_INVALID_SID; - LASSERT(sfw_data.fw_session == NULL); + reply->rmsn_sid = LST_INVALID_SID; + LASSERT(!sfw_data.fw_session); return 0; } @@ -509,14 +509,14 @@ sfw_debug_session(srpc_debug_reqst_t *request, srpc_debug_reply_t *reply) { sfw_session_t *sn = sfw_data.fw_session; - if (sn == NULL) { + if (!sn) { reply->dbg_status = ESRCH; - reply->dbg_sid = LST_INVALID_SID; + reply->dbg_sid = LST_INVALID_SID; return 0; } - reply->dbg_status = 0; - reply->dbg_sid = sn->sn_id; + reply->dbg_status = 0; + reply->dbg_sid = sn->sn_id; reply->dbg_timeout = sn->sn_timeout; if (strlcpy(reply->dbg_name, &sn->sn_name[0], sizeof(reply->dbg_name)) >= sizeof(reply->dbg_name)) @@ -539,10 +539,16 @@ sfw_test_rpc_fini(srpc_client_rpc_t *rpc) static inline int sfw_test_buffers(sfw_test_instance_t *tsi) { - struct sfw_test_case *tsc = sfw_find_test_case(tsi->tsi_service); - struct srpc_service *svc = tsc->tsc_srv_service; + struct sfw_test_case *tsc; + struct srpc_service *svc; int nbuf; + LASSERT(tsi); + tsc = sfw_find_test_case(tsi->tsi_service); + LASSERT(tsc); + svc = tsc->tsc_srv_service; + LASSERT(svc); + nbuf = min(svc->sv_wi_total, tsi->tsi_loop) / svc->sv_ncpts; return max(SFW_TEST_WI_MIN, nbuf + SFW_TEST_WI_EXTRA); } @@ -555,10 +561,10 @@ sfw_load_test(struct sfw_test_instance *tsi) int nbuf; int rc; - LASSERT(tsi != NULL); + LASSERT(tsi); tsc = sfw_find_test_case(tsi->tsi_service); nbuf = sfw_test_buffers(tsi); - LASSERT(tsc != NULL); + LASSERT(tsc); svc = tsc->tsc_srv_service; if (tsi->tsi_is_client) { @@ -567,39 +573,44 @@ sfw_load_test(struct sfw_test_instance *tsi) } rc = srpc_service_add_buffers(svc, nbuf); - if (rc != 0) { + if (rc) { CWARN("Failed to reserve enough buffers: service %s, %d needed: %d\n", svc->sv_name, nbuf, rc); - /* NB: this error handler is not strictly correct, because + /* + * NB: this error handler is not strictly correct, because * it may release more buffers than already allocated, * but it doesn't matter because request portal should - * be lazy portal and will grow buffers if necessary. */ + * be lazy portal and will grow buffers if necessary. + */ srpc_service_remove_buffers(svc, nbuf); return -ENOMEM; } CDEBUG(D_NET, "Reserved %d buffers for test %s\n", nbuf * (srpc_serv_is_framework(svc) ? - 1 : cfs_cpt_number(cfs_cpt_table)), svc->sv_name); + 2 : cfs_cpt_number(cfs_cpt_table)), svc->sv_name); return 0; } static void sfw_unload_test(struct sfw_test_instance *tsi) { - struct sfw_test_case *tsc = sfw_find_test_case(tsi->tsi_service); + struct sfw_test_case *tsc; - LASSERT(tsc != NULL); + LASSERT(tsi); + tsc = sfw_find_test_case(tsi->tsi_service); + LASSERT(tsc); if (tsi->tsi_is_client) return; - /* shrink buffers, because request portal is lazy portal + /* + * shrink buffers, because request portal is lazy portal * which can grow buffers at runtime so we may leave - * some buffers behind, but never mind... */ + * some buffers behind, but never mind... + */ srpc_service_remove_buffers(tsc->tsc_srv_service, sfw_test_buffers(tsi)); - return; } static void @@ -619,14 +630,14 @@ sfw_destroy_test_instance(sfw_test_instance_t *tsi) while (!list_empty(&tsi->tsi_units)) { tsu = list_entry(tsi->tsi_units.next, - sfw_test_unit_t, tsu_list); + sfw_test_unit_t, tsu_list); list_del(&tsu->tsu_list); LIBCFS_FREE(tsu, sizeof(*tsu)); } while (!list_empty(&tsi->tsi_free_rpcs)) { rpc = list_entry(tsi->tsi_free_rpcs.next, - srpc_client_rpc_t, crpc_list); + srpc_client_rpc_t, crpc_list); list_del(&rpc->crpc_list); LIBCFS_FREE(rpc, srpc_client_rpc_size(rpc)); } @@ -634,7 +645,6 @@ sfw_destroy_test_instance(sfw_test_instance_t *tsi) clean: sfw_unload_test(tsi); LIBCFS_FREE(tsi, sizeof(*tsi)); - return; } static void @@ -647,13 +657,12 @@ sfw_destroy_batch(sfw_batch_t *tsb) while (!list_empty(&tsb->bat_tests)) { tsi = list_entry(tsb->bat_tests.next, - sfw_test_instance_t, tsi_list); + sfw_test_instance_t, tsi_list); list_del_init(&tsi->tsi_list); sfw_destroy_test_instance(tsi); } LIBCFS_FREE(tsb, sizeof(sfw_batch_t)); - return; } void @@ -666,14 +675,13 @@ sfw_destroy_session(sfw_session_t *sn) while (!list_empty(&sn->sn_batches)) { batch = list_entry(sn->sn_batches.next, - sfw_batch_t, bat_list); + sfw_batch_t, bat_list); list_del_init(&batch->bat_list); sfw_destroy_batch(batch); } LIBCFS_FREE(sn, sizeof(*sn)); atomic_dec(&sfw_data.fw_nzombies); - return; } static void @@ -690,7 +698,7 @@ sfw_unpack_addtest_req(srpc_msg_t *msg) LASSERT(msg->msg_magic == __swab32(SRPC_MSG_MAGIC)); if (req->tsr_service == SRPC_SERVICE_BRW) { - if ((msg->msg_ses_feats & LST_FEAT_BULK_LEN) == 0) { + if (!(msg->msg_ses_feats & LST_FEAT_BULK_LEN)) { test_bulk_req_t *bulk = &req->tsr_u.bulk_v0; __swab32s(&bulk->blk_opc); @@ -718,7 +726,6 @@ sfw_unpack_addtest_req(srpc_msg_t *msg) } LBUG(); - return; } static int @@ -734,9 +741,9 @@ sfw_add_test_instance(sfw_batch_t *tsb, struct srpc_server_rpc *rpc) int rc; LIBCFS_ALLOC(tsi, sizeof(*tsi)); - if (tsi == NULL) { + if (!tsi) { CERROR("Can't allocate test instance for batch: %llu\n", - tsb->bat_id.bat_id); + tsb->bat_id.bat_id); return -ENOMEM; } @@ -746,16 +753,16 @@ sfw_add_test_instance(sfw_batch_t *tsb, struct srpc_server_rpc *rpc) INIT_LIST_HEAD(&tsi->tsi_free_rpcs); INIT_LIST_HEAD(&tsi->tsi_active_rpcs); - tsi->tsi_stopping = 0; - tsi->tsi_batch = tsb; - tsi->tsi_loop = req->tsr_loop; - tsi->tsi_concur = req->tsr_concur; - tsi->tsi_service = req->tsr_service; - tsi->tsi_is_client = !!(req->tsr_is_client); + tsi->tsi_stopping = 0; + tsi->tsi_batch = tsb; + tsi->tsi_loop = req->tsr_loop; + tsi->tsi_concur = req->tsr_concur; + tsi->tsi_service = req->tsr_service; + tsi->tsi_is_client = !!(req->tsr_is_client); tsi->tsi_stoptsu_onerr = !!(req->tsr_stop_onerr); rc = sfw_load_test(tsi); - if (rc != 0) { + if (rc) { LIBCFS_FREE(tsi, sizeof(*tsi)); return rc; } @@ -768,7 +775,7 @@ sfw_add_test_instance(sfw_batch_t *tsb, struct srpc_server_rpc *rpc) return 0; } - LASSERT(bk != NULL); + LASSERT(bk); LASSERT(bk->bk_niov * SFW_ID_PER_PAGE >= (unsigned int)ndest); LASSERT((unsigned int)bk->bk_len >= sizeof(lnet_process_id_packed_t) * ndest); @@ -782,36 +789,36 @@ sfw_add_test_instance(sfw_batch_t *tsb, struct srpc_server_rpc *rpc) int j; dests = page_address(bk->bk_iovs[i / SFW_ID_PER_PAGE].kiov_page); - LASSERT(dests != NULL); /* my pages are within KVM always */ + LASSERT(dests); /* my pages are within KVM always */ id = dests[i % SFW_ID_PER_PAGE]; if (msg->msg_magic != SRPC_MSG_MAGIC) sfw_unpack_id(id); for (j = 0; j < tsi->tsi_concur; j++) { LIBCFS_ALLOC(tsu, sizeof(sfw_test_unit_t)); - if (tsu == NULL) { + if (!tsu) { rc = -ENOMEM; CERROR("Can't allocate tsu for %d\n", - tsi->tsi_service); + tsi->tsi_service); goto error; } tsu->tsu_dest.nid = id.nid; tsu->tsu_dest.pid = id.pid; tsu->tsu_instance = tsi; - tsu->tsu_private = NULL; + tsu->tsu_private = NULL; list_add_tail(&tsu->tsu_list, &tsi->tsi_units); } } rc = tsi->tsi_ops->tso_init(tsi); - if (rc == 0) { + if (!rc) { list_add_tail(&tsi->tsi_list, &tsb->bat_tests); return 0; } error: - LASSERT(rc != 0); + LASSERT(rc); sfw_destroy_test_instance(tsi); return rc; } @@ -856,7 +863,6 @@ sfw_test_unit_done(sfw_test_unit_t *tsu) spin_unlock(&sfw_data.fw_lock); sfw_destroy_session(sn); - return; } static void @@ -876,9 +882,8 @@ sfw_test_rpc_done(srpc_client_rpc_t *rpc) list_del_init(&rpc->crpc_list); /* batch is stopping or loop is done or get error */ - if (tsi->tsi_stopping || - tsu->tsu_loop == 0 || - (rpc->crpc_status != 0 && tsi->tsi_stoptsu_onerr)) + if (tsi->tsi_stopping || !tsu->tsu_loop || + (rpc->crpc_status && tsi->tsi_stoptsu_onerr)) done = 1; /* dec ref for poster */ @@ -892,7 +897,6 @@ sfw_test_rpc_done(srpc_client_rpc_t *rpc) } sfw_test_unit_done(tsu); - return; } int @@ -906,18 +910,17 @@ sfw_create_test_rpc(sfw_test_unit_t *tsu, lnet_process_id_t peer, spin_lock(&tsi->tsi_lock); LASSERT(sfw_test_active(tsi)); - - if (!list_empty(&tsi->tsi_free_rpcs)) { /* pick request from buffer */ - rpc = list_entry(tsi->tsi_free_rpcs.next, - srpc_client_rpc_t, crpc_list); + rpc = list_first_entry_or_null(&tsi->tsi_free_rpcs, + srpc_client_rpc_t, crpc_list); + if (rpc) { LASSERT(nblk == rpc->crpc_bulk.bk_niov); list_del_init(&rpc->crpc_list); } spin_unlock(&tsi->tsi_lock); - if (rpc == NULL) { + if (!rpc) { rpc = srpc_create_client_rpc(peer, tsi->tsi_service, nblk, blklen, sfw_test_rpc_done, sfw_test_rpc_fini, tsu); @@ -927,7 +930,7 @@ sfw_create_test_rpc(sfw_test_unit_t *tsu, lnet_process_id_t peer, sfw_test_rpc_fini, tsu); } - if (rpc == NULL) { + if (!rpc) { CERROR("Can't create rpc for test %d\n", tsi->tsi_service); return -ENOMEM; } @@ -947,12 +950,12 @@ sfw_run_test(swi_workitem_t *wi) LASSERT(wi == &tsu->tsu_worker); - if (tsi->tsi_ops->tso_prep_rpc(tsu, tsu->tsu_dest, &rpc) != 0) { - LASSERT(rpc == NULL); + if (tsi->tsi_ops->tso_prep_rpc(tsu, tsu->tsu_dest, &rpc)) { + LASSERT(!rpc); goto test_done; } - LASSERT(rpc != NULL); + LASSERT(rpc); spin_lock(&tsi->tsi_lock); @@ -968,9 +971,8 @@ sfw_run_test(swi_workitem_t *wi) list_add_tail(&rpc->crpc_list, &tsi->tsi_active_rpcs); spin_unlock(&tsi->tsi_lock); - rpc->crpc_timeout = rpc_timeout; - spin_lock(&rpc->crpc_lock); + rpc->crpc_timeout = rpc_timeout; srpc_post_rpc(rpc); spin_unlock(&rpc->crpc_lock); return 0; @@ -1015,8 +1017,7 @@ sfw_run_batch(sfw_batch_t *tsb) tsu->tsu_loop = tsi->tsi_loop; wi = &tsu->tsu_worker; swi_init_workitem(wi, tsu, sfw_run_test, - lst_sched_test[\ - lnet_cpt_of_nid(tsu->tsu_dest.nid)]); + lst_sched_test[lnet_cpt_of_nid(tsu->tsu_dest.nid)]); swi_schedule_workitem(wi); } } @@ -1074,7 +1075,7 @@ sfw_query_batch(sfw_batch_t *tsb, int testidx, srpc_batch_reply_t *reply) if (testidx < 0) return -EINVAL; - if (testidx == 0) { + if (!testidx) { reply->bar_active = atomic_read(&tsb->bat_nactive); return 0; } @@ -1101,11 +1102,11 @@ int sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len, int sink) { - LASSERT(rpc->srpc_bulk == NULL); + LASSERT(!rpc->srpc_bulk); LASSERT(npages > 0 && npages <= LNET_MAX_IOV); rpc->srpc_bulk = srpc_alloc_bulk(cpt, npages, len, sink); - if (rpc->srpc_bulk == NULL) + if (!rpc->srpc_bulk) return -ENOMEM; return 0; @@ -1121,13 +1122,13 @@ sfw_add_test(struct srpc_server_rpc *rpc) sfw_batch_t *bat; request = &rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst; - reply->tsr_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id; + reply->tsr_sid = !sn ? LST_INVALID_SID : sn->sn_id; - if (request->tsr_loop == 0 || - request->tsr_concur == 0 || + if (!request->tsr_loop || + !request->tsr_concur || request->tsr_sid.ses_nid == LNET_NID_ANY || request->tsr_ndest > SFW_MAX_NDESTS || - (request->tsr_is_client && request->tsr_ndest == 0) || + (request->tsr_is_client && !request->tsr_ndest) || request->tsr_concur > SFW_MAX_CONCUR || request->tsr_service > SRPC_SERVICE_MAX_ID || request->tsr_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID) { @@ -1135,17 +1136,17 @@ sfw_add_test(struct srpc_server_rpc *rpc) return 0; } - if (sn == NULL || !sfw_sid_equal(request->tsr_sid, sn->sn_id) || - sfw_find_test_case(request->tsr_service) == NULL) { + if (!sn || !sfw_sid_equal(request->tsr_sid, sn->sn_id) || + !sfw_find_test_case(request->tsr_service)) { reply->tsr_status = ENOENT; return 0; } bat = sfw_bid2batch(request->tsr_bid); - if (bat == NULL) { - CERROR("Dropping RPC (%s) from %s under memory pressure.\n", - rpc->srpc_scd->scd_svc->sv_name, - libcfs_id2str(rpc->srpc_peer)); + if (!bat) { + CERROR("dropping RPC %s from %s under memory pressure\n", + rpc->srpc_scd->scd_svc->sv_name, + libcfs_id2str(rpc->srpc_peer)); return -ENOMEM; } @@ -1154,15 +1155,15 @@ sfw_add_test(struct srpc_server_rpc *rpc) return 0; } - if (request->tsr_is_client && rpc->srpc_bulk == NULL) { + if (request->tsr_is_client && !rpc->srpc_bulk) { /* rpc will be resumed later in sfw_bulk_ready */ int npg = sfw_id_pages(request->tsr_ndest); int len; - if ((sn->sn_features & LST_FEAT_BULK_LEN) == 0) { - len = npg * PAGE_CACHE_SIZE; + if (!(sn->sn_features & LST_FEAT_BULK_LEN)) { + len = npg * PAGE_SIZE; - } else { + } else { len = sizeof(lnet_process_id_packed_t) * request->tsr_ndest; } @@ -1171,11 +1172,11 @@ sfw_add_test(struct srpc_server_rpc *rpc) } rc = sfw_add_test_instance(bat, rpc); - CDEBUG(rc == 0 ? D_NET : D_WARNING, - "%s test: sv %d %s, loop %d, concur %d, ndest %d\n", - rc == 0 ? "Added" : "Failed to add", request->tsr_service, - request->tsr_is_client ? "client" : "server", - request->tsr_loop, request->tsr_concur, request->tsr_ndest); + CDEBUG(!rc ? D_NET : D_WARNING, + "%s test: sv %d %s, loop %d, concur %d, ndest %d\n", + !rc ? "Added" : "Failed to add", request->tsr_service, + request->tsr_is_client ? "client" : "server", + request->tsr_loop, request->tsr_concur, request->tsr_ndest); reply->tsr_status = (rc < 0) ? -rc : rc; return 0; @@ -1188,15 +1189,15 @@ sfw_control_batch(srpc_batch_reqst_t *request, srpc_batch_reply_t *reply) int rc = 0; sfw_batch_t *bat; - reply->bar_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id; + reply->bar_sid = !sn ? LST_INVALID_SID : sn->sn_id; - if (sn == NULL || !sfw_sid_equal(request->bar_sid, sn->sn_id)) { + if (!sn || !sfw_sid_equal(request->bar_sid, sn->sn_id)) { reply->bar_status = ESRCH; return 0; } bat = sfw_find_batch(request->bar_bid); - if (bat == NULL) { + if (!bat) { reply->bar_status = ENOENT; return 0; } @@ -1231,7 +1232,7 @@ sfw_handle_server_rpc(struct srpc_server_rpc *rpc) unsigned features = LST_FEATS_MASK; int rc = 0; - LASSERT(sfw_data.fw_active_srpc == NULL); + LASSERT(!sfw_data.fw_active_srpc); LASSERT(sv->sv_id <= SRPC_FRAMEWORK_SERVICE_MAX_ID); spin_lock(&sfw_data.fw_lock); @@ -1242,7 +1243,7 @@ sfw_handle_server_rpc(struct srpc_server_rpc *rpc) } /* Remove timer to avoid racing with it or expiring active session */ - if (sfw_del_session_timer() != 0) { + if (sfw_del_session_timer()) { CERROR("Dropping RPC (%s) from %s: racing with expiry timer.", sv->sv_name, libcfs_id2str(rpc->srpc_peer)); spin_unlock(&sfw_data.fw_lock); @@ -1262,19 +1263,21 @@ sfw_handle_server_rpc(struct srpc_server_rpc *rpc) sv->sv_id != SRPC_SERVICE_DEBUG) { sfw_session_t *sn = sfw_data.fw_session; - if (sn != NULL && + if (sn && sn->sn_features != request->msg_ses_feats) { CNETERR("Features of framework RPC don't match features of current session: %x/%x\n", request->msg_ses_feats, sn->sn_features); reply->msg_body.reply.status = EPROTO; - reply->msg_body.reply.sid = sn->sn_id; + reply->msg_body.reply.sid = sn->sn_id; goto out; } - } else if ((request->msg_ses_feats & ~LST_FEATS_MASK) != 0) { - /* NB: at this point, old version will ignore features and + } else if (request->msg_ses_feats & ~LST_FEATS_MASK) { + /** + * NB: at this point, old version will ignore features and * create new session anyway, so console should be able - * to handle this */ + * to handle this + */ reply->msg_body.reply.status = EPROTO; goto out; } @@ -1312,7 +1315,7 @@ sfw_handle_server_rpc(struct srpc_server_rpc *rpc) break; } - if (sfw_data.fw_session != NULL) + if (sfw_data.fw_session) features = sfw_data.fw_session->sn_features; out: reply->msg_ses_feats = features; @@ -1333,14 +1336,14 @@ sfw_bulk_ready(struct srpc_server_rpc *rpc, int status) struct srpc_service *sv = rpc->srpc_scd->scd_svc; int rc; - LASSERT(rpc->srpc_bulk != NULL); + LASSERT(rpc->srpc_bulk); LASSERT(sv->sv_id == SRPC_SERVICE_TEST); - LASSERT(sfw_data.fw_active_srpc == NULL); + LASSERT(!sfw_data.fw_active_srpc); LASSERT(rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst.tsr_is_client); spin_lock(&sfw_data.fw_lock); - if (status != 0) { + if (status) { CERROR("Bulk transfer failed for RPC: service %s, peer %s, status %d\n", sv->sv_name, libcfs_id2str(rpc->srpc_peer), status); spin_unlock(&sfw_data.fw_lock); @@ -1352,8 +1355,8 @@ sfw_bulk_ready(struct srpc_server_rpc *rpc, int status) return -ESHUTDOWN; } - if (sfw_del_session_timer() != 0) { - CERROR("Dropping RPC (%s) from %s: racing with expiry timer", + if (sfw_del_session_timer()) { + CERROR("dropping RPC %s from %s: racing with expiry timer\n", sv->sv_name, libcfs_id2str(rpc->srpc_peer)); spin_unlock(&sfw_data.fw_lock); return -EAGAIN; @@ -1386,9 +1389,9 @@ sfw_create_rpc(lnet_process_id_t peer, int service, LASSERT(!sfw_data.fw_shuttingdown); LASSERT(service <= SRPC_FRAMEWORK_SERVICE_MAX_ID); - if (nbulkiov == 0 && !list_empty(&sfw_data.fw_zombie_rpcs)) { + if (!nbulkiov && !list_empty(&sfw_data.fw_zombie_rpcs)) { rpc = list_entry(sfw_data.fw_zombie_rpcs.next, - srpc_client_rpc_t, crpc_list); + srpc_client_rpc_t, crpc_list); list_del(&rpc->crpc_list); srpc_init_client_rpc(rpc, peer, service, 0, 0, @@ -1397,15 +1400,15 @@ sfw_create_rpc(lnet_process_id_t peer, int service, spin_unlock(&sfw_data.fw_lock); - if (rpc == NULL) { + if (!rpc) { rpc = srpc_create_client_rpc(peer, service, nbulkiov, bulklen, done, - nbulkiov != 0 ? NULL : + nbulkiov ? NULL : sfw_client_rpc_fini, priv); } - if (rpc != NULL) /* "session" is concept in framework */ + if (rpc) /* "session" is concept in framework */ rpc->crpc_reqstmsg.msg_ses_feats = features; return rpc; @@ -1552,7 +1555,6 @@ sfw_unpack_message(srpc_msg_t *msg) } LBUG(); - return; } void @@ -1564,7 +1566,6 @@ sfw_abort_rpc(srpc_client_rpc_t *rpc) spin_lock(&rpc->crpc_lock); srpc_abort_rpc(rpc, -EINTR); spin_unlock(&rpc->crpc_lock); - return; } void @@ -1581,7 +1582,6 @@ sfw_post_rpc(srpc_client_rpc_t *rpc) srpc_post_rpc(rpc); spin_unlock(&rpc->crpc_lock); - return; } static srpc_service_t sfw_services[] = { @@ -1622,16 +1622,6 @@ static srpc_service_t sfw_services[] = { } }; -extern sfw_test_client_ops_t ping_test_client; -extern srpc_service_t ping_test_service; -extern void ping_init_test_client(void); -extern void ping_init_test_service(void); - -extern sfw_test_client_ops_t brw_test_client; -extern srpc_service_t brw_test_service; -extern void brw_init_test_client(void); -extern void brw_init_test_service(void); - int sfw_startup(void) { @@ -1643,25 +1633,25 @@ sfw_startup(void) if (session_timeout < 0) { CERROR("Session timeout must be non-negative: %d\n", - session_timeout); + session_timeout); return -EINVAL; } if (rpc_timeout < 0) { CERROR("RPC timeout must be non-negative: %d\n", - rpc_timeout); + rpc_timeout); return -EINVAL; } - if (session_timeout == 0) + if (!session_timeout) CWARN("Zero session_timeout specified - test sessions never expire.\n"); - if (rpc_timeout == 0) + if (!rpc_timeout) CWARN("Zero rpc_timeout specified - test RPC never expire.\n"); memset(&sfw_data, 0, sizeof(struct smoketest_framework)); - sfw_data.fw_session = NULL; + sfw_data.fw_session = NULL; sfw_data.fw_active_srpc = NULL; spin_lock_init(&sfw_data.fw_lock); atomic_set(&sfw_data.fw_nzombies, 0); @@ -1672,12 +1662,12 @@ sfw_startup(void) brw_init_test_client(); brw_init_test_service(); rc = sfw_register_test(&brw_test_service, &brw_test_client); - LASSERT(rc == 0); + LASSERT(!rc); ping_init_test_client(); ping_init_test_service(); rc = sfw_register_test(&ping_test_service, &ping_test_client); - LASSERT(rc == 0); + LASSERT(!rc); error = 0; list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) { @@ -1685,29 +1675,29 @@ sfw_startup(void) rc = srpc_add_service(sv); LASSERT(rc != -EBUSY); - if (rc != 0) { + if (rc) { CWARN("Failed to add %s service: %d\n", - sv->sv_name, rc); + sv->sv_name, rc); error = rc; } } for (i = 0; ; i++) { sv = &sfw_services[i]; - if (sv->sv_name == NULL) + if (!sv->sv_name) break; sv->sv_bulk_ready = NULL; - sv->sv_handler = sfw_handle_server_rpc; - sv->sv_wi_total = SFW_FRWK_WI_MAX; + sv->sv_handler = sfw_handle_server_rpc; + sv->sv_wi_total = SFW_FRWK_WI_MAX; if (sv->sv_id == SRPC_SERVICE_TEST) sv->sv_bulk_ready = sfw_bulk_ready; rc = srpc_add_service(sv); LASSERT(rc != -EBUSY); - if (rc != 0) { + if (rc) { CWARN("Failed to add %s service: %d\n", - sv->sv_name, rc); + sv->sv_name, rc); error = rc; } @@ -1716,14 +1706,14 @@ sfw_startup(void) continue; rc = srpc_service_add_buffers(sv, sv->sv_wi_total); - if (rc != 0) { + if (rc) { CWARN("Failed to reserve enough buffers: service %s, %d needed: %d\n", sv->sv_name, sv->sv_wi_total, rc); error = -ENOMEM; } } - if (error != 0) + if (error) sfw_shutdown(); return error; } @@ -1738,15 +1728,15 @@ sfw_shutdown(void) spin_lock(&sfw_data.fw_lock); sfw_data.fw_shuttingdown = 1; - lst_wait_until(sfw_data.fw_active_srpc == NULL, sfw_data.fw_lock, + lst_wait_until(!sfw_data.fw_active_srpc, sfw_data.fw_lock, "waiting for active RPC to finish.\n"); - if (sfw_del_session_timer() != 0) - lst_wait_until(sfw_data.fw_session == NULL, sfw_data.fw_lock, + if (sfw_del_session_timer()) + lst_wait_until(!sfw_data.fw_session, sfw_data.fw_lock, "waiting for session timer to explode.\n"); sfw_deactivate_session(); - lst_wait_until(atomic_read(&sfw_data.fw_nzombies) == 0, + lst_wait_until(!atomic_read(&sfw_data.fw_nzombies), sfw_data.fw_lock, "waiting for %d zombie sessions to die.\n", atomic_read(&sfw_data.fw_nzombies)); @@ -1755,7 +1745,7 @@ sfw_shutdown(void) for (i = 0; ; i++) { sv = &sfw_services[i]; - if (sv->sv_name == NULL) + if (!sv->sv_name) break; srpc_shutdown_service(sv); @@ -1772,7 +1762,7 @@ sfw_shutdown(void) srpc_client_rpc_t *rpc; rpc = list_entry(sfw_data.fw_zombie_rpcs.next, - srpc_client_rpc_t, crpc_list); + srpc_client_rpc_t, crpc_list); list_del(&rpc->crpc_list); LIBCFS_FREE(rpc, srpc_client_rpc_size(rpc)); @@ -1780,7 +1770,7 @@ sfw_shutdown(void) for (i = 0; ; i++) { sv = &sfw_services[i]; - if (sv->sv_name == NULL) + if (!sv->sv_name) break; srpc_wait_service_shutdown(sv); @@ -1788,13 +1778,11 @@ sfw_shutdown(void) while (!list_empty(&sfw_data.fw_tests)) { tsc = list_entry(sfw_data.fw_tests.next, - sfw_test_case_t, tsc_list); + sfw_test_case_t, tsc_list); srpc_wait_service_shutdown(tsc->tsc_srv_service); list_del(&tsc->tsc_list); LIBCFS_FREE(tsc, sizeof(*tsc)); } - - return; } diff --git a/drivers/staging/lustre/lnet/selftest/module.c b/drivers/staging/lustre/lnet/selftest/module.c index 46cbdf045..cc046b1d4 100644 --- a/drivers/staging/lustre/lnet/selftest/module.c +++ b/drivers/staging/lustre/lnet/selftest/module.c @@ -37,9 +37,10 @@ #define DEBUG_SUBSYSTEM S_LNET #include "selftest.h" +#include "console.h" enum { - LST_INIT_NONE = 0, + LST_INIT_NONE = 0, LST_INIT_WI_SERIAL, LST_INIT_WI_TEST, LST_INIT_RPC, @@ -47,16 +48,13 @@ enum { LST_INIT_CONSOLE }; -extern int lstcon_console_init(void); -extern int lstcon_console_fini(void); - static int lst_init_step = LST_INIT_NONE; struct cfs_wi_sched *lst_sched_serial; struct cfs_wi_sched **lst_sched_test; static void -lnet_selftest_fini(void) +lnet_selftest_exit(void) { int i; @@ -70,7 +68,7 @@ lnet_selftest_fini(void) case LST_INIT_WI_TEST: for (i = 0; i < cfs_cpt_number(lnet_cpt_table()); i++) { - if (lst_sched_test[i] == NULL) + if (!lst_sched_test[i]) continue; cfs_wi_sched_destroy(lst_sched_test[i]); } @@ -98,7 +96,7 @@ lnet_selftest_init(void) rc = cfs_wi_sched_create("lst_s", lnet_cpt_table(), CFS_CPT_ANY, 1, &lst_sched_serial); - if (rc != 0) { + if (rc) { CERROR("Failed to create serial WI scheduler for LST\n"); return rc; } @@ -106,7 +104,7 @@ lnet_selftest_init(void) nscheds = cfs_cpt_number(lnet_cpt_table()); LIBCFS_ALLOC(lst_sched_test, sizeof(lst_sched_test[0]) * nscheds); - if (lst_sched_test == NULL) + if (!lst_sched_test) goto error; lst_init_step = LST_INIT_WI_TEST; @@ -117,42 +115,42 @@ lnet_selftest_init(void) nthrs = max(nthrs - 1, 1); rc = cfs_wi_sched_create("lst_t", lnet_cpt_table(), i, nthrs, &lst_sched_test[i]); - if (rc != 0) { - CERROR("Failed to create CPT affinity WI scheduler %d for LST\n", - i); + if (rc) { + CERROR("Failed to create CPT affinity WI scheduler %d for LST\n", i); goto error; } } rc = srpc_startup(); - if (rc != 0) { + if (rc) { CERROR("LST can't startup rpc\n"); goto error; } lst_init_step = LST_INIT_RPC; rc = sfw_startup(); - if (rc != 0) { + if (rc) { CERROR("LST can't startup framework\n"); goto error; } lst_init_step = LST_INIT_FW; rc = lstcon_console_init(); - if (rc != 0) { + if (rc) { CERROR("LST can't startup console\n"); goto error; } lst_init_step = LST_INIT_CONSOLE; return 0; error: - lnet_selftest_fini(); + lnet_selftest_exit(); return rc; } +MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>"); MODULE_DESCRIPTION("LNet Selftest"); +MODULE_VERSION("2.7.0"); MODULE_LICENSE("GPL"); -MODULE_VERSION("0.9.0"); module_init(lnet_selftest_init); -module_exit(lnet_selftest_fini); +module_exit(lnet_selftest_exit); diff --git a/drivers/staging/lustre/lnet/selftest/ping_test.c b/drivers/staging/lustre/lnet/selftest/ping_test.c index d42653654..81a45045e 100644 --- a/drivers/staging/lustre/lnet/selftest/ping_test.c +++ b/drivers/staging/lustre/lnet/selftest/ping_test.c @@ -42,18 +42,18 @@ #include "selftest.h" -#define LST_PING_TEST_MAGIC 0xbabeface +#define LST_PING_TEST_MAGIC 0xbabeface static int ping_srv_workitems = SFW_TEST_WI_MAX; module_param(ping_srv_workitems, int, 0644); MODULE_PARM_DESC(ping_srv_workitems, "# PING server workitems"); -typedef struct { +struct lst_ping_data { spinlock_t pnd_lock; /* serialize */ int pnd_counter; /* sequence counter */ -} lst_ping_data_t; +}; -static lst_ping_data_t lst_ping_data; +static struct lst_ping_data lst_ping_data; static int ping_client_init(sfw_test_instance_t *tsi) @@ -61,7 +61,7 @@ ping_client_init(sfw_test_instance_t *tsi) sfw_session_t *sn = tsi->tsi_batch->bat_session; LASSERT(tsi->tsi_is_client); - LASSERT(sn != NULL && (sn->sn_features & ~LST_FEATS_MASK) == 0); + LASSERT(sn && !(sn->sn_features & ~LST_FEATS_MASK)); spin_lock_init(&lst_ping_data.pnd_lock); lst_ping_data.pnd_counter = 0; @@ -75,7 +75,7 @@ ping_client_fini(sfw_test_instance_t *tsi) sfw_session_t *sn = tsi->tsi_batch->bat_session; int errors; - LASSERT(sn != NULL); + LASSERT(sn); LASSERT(tsi->tsi_is_client); errors = atomic_read(&sn->sn_ping_errors); @@ -95,11 +95,11 @@ ping_client_prep_rpc(sfw_test_unit_t *tsu, struct timespec64 ts; int rc; - LASSERT(sn != NULL); - LASSERT((sn->sn_features & ~LST_FEATS_MASK) == 0); + LASSERT(sn); + LASSERT(!(sn->sn_features & ~LST_FEATS_MASK)); rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, 0, 0, rpc); - if (rc != 0) + if (rc) return rc; req = &(*rpc)->crpc_reqstmsg.msg_body.ping_reqst; @@ -111,7 +111,7 @@ ping_client_prep_rpc(sfw_test_unit_t *tsu, spin_unlock(&lst_ping_data.pnd_lock); ktime_get_real_ts64(&ts); - req->pnr_time_sec = ts.tv_sec; + req->pnr_time_sec = ts.tv_sec; req->pnr_time_usec = ts.tv_nsec / NSEC_PER_USEC; return rc; @@ -126,14 +126,14 @@ ping_client_done_rpc(sfw_test_unit_t *tsu, srpc_client_rpc_t *rpc) srpc_ping_reply_t *reply = &rpc->crpc_replymsg.msg_body.ping_reply; struct timespec64 ts; - LASSERT(sn != NULL); + LASSERT(sn); - if (rpc->crpc_status != 0) { + if (rpc->crpc_status) { if (!tsi->tsi_stopping) /* rpc could have been aborted */ atomic_inc(&sn->sn_ping_errors); CERROR("Unable to ping %s (%d): %d\n", - libcfs_id2str(rpc->crpc_dest), - reqst->pnr_seq, rpc->crpc_status); + libcfs_id2str(rpc->crpc_dest), + reqst->pnr_seq, rpc->crpc_status); return; } @@ -147,8 +147,8 @@ ping_client_done_rpc(sfw_test_unit_t *tsu, srpc_client_rpc_t *rpc) rpc->crpc_status = -EBADMSG; atomic_inc(&sn->sn_ping_errors); CERROR("Bad magic %u from %s, %u expected.\n", - reply->pnr_magic, libcfs_id2str(rpc->crpc_dest), - LST_PING_TEST_MAGIC); + reply->pnr_magic, libcfs_id2str(rpc->crpc_dest), + LST_PING_TEST_MAGIC); return; } @@ -156,8 +156,8 @@ ping_client_done_rpc(sfw_test_unit_t *tsu, srpc_client_rpc_t *rpc) rpc->crpc_status = -EBADMSG; atomic_inc(&sn->sn_ping_errors); CERROR("Bad seq %u from %s, %u expected.\n", - reply->pnr_seq, libcfs_id2str(rpc->crpc_dest), - reqst->pnr_seq); + reply->pnr_seq, libcfs_id2str(rpc->crpc_dest), + reqst->pnr_seq); return; } @@ -165,13 +165,12 @@ ping_client_done_rpc(sfw_test_unit_t *tsu, srpc_client_rpc_t *rpc) CDEBUG(D_NET, "%d reply in %u usec\n", reply->pnr_seq, (unsigned)((ts.tv_sec - reqst->pnr_time_sec) * 1000000 + (ts.tv_nsec / NSEC_PER_USEC - reqst->pnr_time_usec))); - return; } static int ping_server_handle(struct srpc_server_rpc *rpc) { - struct srpc_service *sv = rpc->srpc_scd->scd_svc; + struct srpc_service *sv = rpc->srpc_scd->scd_svc; srpc_msg_t *reqstmsg = &rpc->srpc_reqstbuf->buf_msg; srpc_msg_t *replymsg = &rpc->srpc_replymsg; srpc_ping_reqst_t *req = &reqstmsg->msg_body.ping_reqst; @@ -191,14 +190,14 @@ ping_server_handle(struct srpc_server_rpc *rpc) if (req->pnr_magic != LST_PING_TEST_MAGIC) { CERROR("Unexpected magic %08x from %s\n", - req->pnr_magic, libcfs_id2str(rpc->srpc_peer)); + req->pnr_magic, libcfs_id2str(rpc->srpc_peer)); return -EINVAL; } - rep->pnr_seq = req->pnr_seq; + rep->pnr_seq = req->pnr_seq; rep->pnr_magic = LST_PING_TEST_MAGIC; - if ((reqstmsg->msg_ses_feats & ~LST_FEATS_MASK) != 0) { + if (reqstmsg->msg_ses_feats & ~LST_FEATS_MASK) { replymsg->msg_ses_feats = LST_FEATS_MASK; rep->pnr_status = EPROTO; return 0; @@ -214,8 +213,8 @@ ping_server_handle(struct srpc_server_rpc *rpc) sfw_test_client_ops_t ping_test_client; void ping_init_test_client(void) { - ping_test_client.tso_init = ping_client_init; - ping_test_client.tso_fini = ping_client_fini; + ping_test_client.tso_init = ping_client_init; + ping_test_client.tso_fini = ping_client_fini; ping_test_client.tso_prep_rpc = ping_client_prep_rpc; ping_test_client.tso_done_rpc = ping_client_done_rpc; } @@ -223,8 +222,8 @@ void ping_init_test_client(void) srpc_service_t ping_test_service; void ping_init_test_service(void) { - ping_test_service.sv_id = SRPC_SERVICE_PING; - ping_test_service.sv_name = "ping_test"; - ping_test_service.sv_handler = ping_server_handle; + ping_test_service.sv_id = SRPC_SERVICE_PING; + ping_test_service.sv_name = "ping_test"; + ping_test_service.sv_handler = ping_server_handle; ping_test_service.sv_wi_total = ping_srv_workitems; } diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c index 2acf6ec71..7d7748d96 100644 --- a/drivers/staging/lustre/lnet/selftest/rpc.c +++ b/drivers/staging/lustre/lnet/selftest/rpc.c @@ -90,14 +90,14 @@ void srpc_set_counters(const srpc_counters_t *cnt) static int srpc_add_bulk_page(srpc_bulk_t *bk, struct page *pg, int i, int nob) { - nob = min_t(int, nob, PAGE_CACHE_SIZE); + nob = min_t(int, nob, PAGE_SIZE); LASSERT(nob > 0); LASSERT(i >= 0 && i < bk->bk_niov); bk->bk_iovs[i].kiov_offset = 0; - bk->bk_iovs[i].kiov_page = pg; - bk->bk_iovs[i].kiov_len = nob; + bk->bk_iovs[i].kiov_page = pg; + bk->bk_iovs[i].kiov_len = nob; return nob; } @@ -107,18 +107,17 @@ srpc_free_bulk(srpc_bulk_t *bk) int i; struct page *pg; - LASSERT(bk != NULL); + LASSERT(bk); for (i = 0; i < bk->bk_niov; i++) { pg = bk->bk_iovs[i].kiov_page; - if (pg == NULL) + if (!pg) break; __free_page(pg); } LIBCFS_FREE(bk, offsetof(srpc_bulk_t, bk_iovs[bk->bk_niov])); - return; } srpc_bulk_t * @@ -131,15 +130,15 @@ srpc_alloc_bulk(int cpt, unsigned bulk_npg, unsigned bulk_len, int sink) LIBCFS_CPT_ALLOC(bk, lnet_cpt_table(), cpt, offsetof(srpc_bulk_t, bk_iovs[bulk_npg])); - if (bk == NULL) { + if (!bk) { CERROR("Can't allocate descriptor for %d pages\n", bulk_npg); return NULL; } memset(bk, 0, offsetof(srpc_bulk_t, bk_iovs[bulk_npg])); - bk->bk_sink = sink; - bk->bk_len = bulk_len; - bk->bk_niov = bulk_npg; + bk->bk_sink = sink; + bk->bk_len = bulk_len; + bk->bk_niov = bulk_npg; for (i = 0; i < bulk_npg; i++) { struct page *pg; @@ -147,7 +146,7 @@ srpc_alloc_bulk(int cpt, unsigned bulk_npg, unsigned bulk_len, int sink) pg = alloc_pages_node(cfs_cpt_spread_node(lnet_cpt_table(), cpt), GFP_KERNEL, 0); - if (pg == NULL) { + if (!pg) { CERROR("Can't allocate page %d of %d\n", i, bulk_npg); srpc_free_bulk(bk); return NULL; @@ -183,10 +182,10 @@ srpc_init_server_rpc(struct srpc_server_rpc *rpc, rpc->srpc_ev.ev_fired = 1; /* no event expected now */ - rpc->srpc_scd = scd; + rpc->srpc_scd = scd; rpc->srpc_reqstbuf = buffer; - rpc->srpc_peer = buffer->buf_peer; - rpc->srpc_self = buffer->buf_self; + rpc->srpc_peer = buffer->buf_peer; + rpc->srpc_self = buffer->buf_self; LNetInvalidateHandle(&rpc->srpc_replymdh); } @@ -199,7 +198,7 @@ srpc_service_fini(struct srpc_service *svc) struct list_head *q; int i; - if (svc->sv_cpt_data == NULL) + if (!svc->sv_cpt_data) return; cfs_percpt_for_each(scd, i, svc->sv_cpt_data) { @@ -212,9 +211,8 @@ srpc_service_fini(struct srpc_service *svc) break; while (!list_empty(q)) { - buf = list_entry(q->next, - struct srpc_buffer, - buf_list); + buf = list_entry(q->next, struct srpc_buffer, + buf_list); list_del(&buf->buf_list); LIBCFS_FREE(buf, sizeof(*buf)); } @@ -224,8 +222,8 @@ srpc_service_fini(struct srpc_service *svc) while (!list_empty(&scd->scd_rpc_free)) { rpc = list_entry(scd->scd_rpc_free.next, - struct srpc_server_rpc, - srpc_list); + struct srpc_server_rpc, + srpc_list); list_del(&rpc->srpc_list); LIBCFS_FREE(rpc, sizeof(*rpc)); } @@ -259,7 +257,7 @@ srpc_service_init(struct srpc_service *svc) svc->sv_cpt_data = cfs_percpt_alloc(lnet_cpt_table(), sizeof(struct srpc_service_cd)); - if (svc->sv_cpt_data == NULL) + if (!svc->sv_cpt_data) return -ENOMEM; svc->sv_ncpts = srpc_serv_is_framework(svc) ? @@ -278,23 +276,27 @@ srpc_service_init(struct srpc_service *svc) scd->scd_ev.ev_data = scd; scd->scd_ev.ev_type = SRPC_REQUEST_RCVD; - /* NB: don't use lst_sched_serial for adding buffer, - * see details in srpc_service_add_buffers() */ + /* + * NB: don't use lst_sched_serial for adding buffer, + * see details in srpc_service_add_buffers() + */ swi_init_workitem(&scd->scd_buf_wi, scd, srpc_add_buffer, lst_sched_test[i]); - if (i != 0 && srpc_serv_is_framework(svc)) { - /* NB: framework service only needs srpc_service_cd for + if (i && srpc_serv_is_framework(svc)) { + /* + * NB: framework service only needs srpc_service_cd for * one partition, but we allocate for all to make * it easier to implement, it will waste a little - * memory but nobody should care about this */ + * memory but nobody should care about this + */ continue; } for (j = 0; j < nrpcs; j++) { LIBCFS_CPT_ALLOC(rpc, lnet_cpt_table(), i, sizeof(*rpc)); - if (rpc == NULL) { + if (!rpc) { srpc_service_fini(svc); return -ENOMEM; } @@ -312,14 +314,14 @@ srpc_add_service(struct srpc_service *sv) LASSERT(0 <= id && id <= SRPC_SERVICE_MAX_ID); - if (srpc_service_init(sv) != 0) + if (srpc_service_init(sv)) return -ENOMEM; spin_lock(&srpc_data.rpc_glock); LASSERT(srpc_data.rpc_state == SRPC_STATE_RUNNING); - if (srpc_data.rpc_services[id] != NULL) { + if (srpc_data.rpc_services[id]) { spin_unlock(&srpc_data.rpc_glock); goto failed; } @@ -363,32 +365,31 @@ srpc_post_passive_rdma(int portal, int local, __u64 matchbits, void *buf, rc = LNetMEAttach(portal, peer, matchbits, 0, LNET_UNLINK, local ? LNET_INS_LOCAL : LNET_INS_AFTER, &meh); - if (rc != 0) { + if (rc) { CERROR("LNetMEAttach failed: %d\n", rc); LASSERT(rc == -ENOMEM); return -ENOMEM; } md.threshold = 1; - md.user_ptr = ev; - md.start = buf; - md.length = len; - md.options = options; + md.user_ptr = ev; + md.start = buf; + md.length = len; + md.options = options; md.eq_handle = srpc_data.rpc_lnet_eq; rc = LNetMDAttach(meh, md, LNET_UNLINK, mdh); - if (rc != 0) { + if (rc) { CERROR("LNetMDAttach failed: %d\n", rc); LASSERT(rc == -ENOMEM); rc = LNetMEUnlink(meh); - LASSERT(rc == 0); + LASSERT(!rc); return -ENOMEM; } - CDEBUG(D_NET, - "Posted passive RDMA: peer %s, portal %d, matchbits %#llx\n", - libcfs_id2str(peer), portal, matchbits); + CDEBUG(D_NET, "Posted passive RDMA: peer %s, portal %d, matchbits %#llx\n", + libcfs_id2str(peer), portal, matchbits); return 0; } @@ -400,46 +401,48 @@ srpc_post_active_rdma(int portal, __u64 matchbits, void *buf, int len, int rc; lnet_md_t md; - md.user_ptr = ev; - md.start = buf; - md.length = len; + md.user_ptr = ev; + md.start = buf; + md.length = len; md.eq_handle = srpc_data.rpc_lnet_eq; - md.threshold = ((options & LNET_MD_OP_GET) != 0) ? 2 : 1; - md.options = options & ~(LNET_MD_OP_PUT | LNET_MD_OP_GET); + md.threshold = options & LNET_MD_OP_GET ? 2 : 1; + md.options = options & ~(LNET_MD_OP_PUT | LNET_MD_OP_GET); rc = LNetMDBind(md, LNET_UNLINK, mdh); - if (rc != 0) { + if (rc) { CERROR("LNetMDBind failed: %d\n", rc); LASSERT(rc == -ENOMEM); return -ENOMEM; } - /* this is kind of an abuse of the LNET_MD_OP_{PUT,GET} options. + /* + * this is kind of an abuse of the LNET_MD_OP_{PUT,GET} options. * they're only meaningful for MDs attached to an ME (i.e. passive - * buffers... */ - if ((options & LNET_MD_OP_PUT) != 0) { + * buffers... + */ + if (options & LNET_MD_OP_PUT) { rc = LNetPut(self, *mdh, LNET_NOACK_REQ, peer, portal, matchbits, 0, 0); } else { - LASSERT((options & LNET_MD_OP_GET) != 0); + LASSERT(options & LNET_MD_OP_GET); rc = LNetGet(self, *mdh, peer, portal, matchbits, 0); } - if (rc != 0) { + if (rc) { CERROR("LNet%s(%s, %d, %lld) failed: %d\n", - ((options & LNET_MD_OP_PUT) != 0) ? "Put" : "Get", - libcfs_id2str(peer), portal, matchbits, rc); + options & LNET_MD_OP_PUT ? "Put" : "Get", + libcfs_id2str(peer), portal, matchbits, rc); - /* The forthcoming unlink event will complete this operation + /* + * The forthcoming unlink event will complete this operation * with failure, so fall through and return success here. */ rc = LNetMDUnlink(*mdh); - LASSERT(rc == 0); + LASSERT(!rc); } else { - CDEBUG(D_NET, - "Posted active RDMA: peer %s, portal %u, matchbits %#llx\n", - libcfs_id2str(peer), portal, matchbits); + CDEBUG(D_NET, "Posted active RDMA: peer %s, portal %u, matchbits %#llx\n", + libcfs_id2str(peer), portal, matchbits); } return 0; } @@ -448,7 +451,7 @@ static int srpc_post_passive_rqtbuf(int service, int local, void *buf, int len, lnet_handle_md_t *mdh, srpc_event_t *ev) { - lnet_process_id_t any = {0}; + lnet_process_id_t any = { 0 }; any.nid = LNET_NID_ANY; any.pid = LNET_PID_ANY; @@ -460,10 +463,10 @@ srpc_post_passive_rqtbuf(int service, int local, void *buf, int len, static int srpc_service_post_buffer(struct srpc_service_cd *scd, struct srpc_buffer *buf) - __must_hold(&scd->scd_lock) +__must_hold(&scd->scd_lock) { struct srpc_service *sv = scd->scd_svc; - struct srpc_msg *msg = &buf->buf_msg; + struct srpc_msg *msg = &buf->buf_msg; int rc; LNetInvalidateHandle(&buf->buf_mdh); @@ -476,19 +479,22 @@ srpc_service_post_buffer(struct srpc_service_cd *scd, struct srpc_buffer *buf) msg, sizeof(*msg), &buf->buf_mdh, &scd->scd_ev); - /* At this point, a RPC (new or delayed) may have arrived in + /* + * At this point, a RPC (new or delayed) may have arrived in * msg and its event handler has been called. So we must add - * buf to scd_buf_posted _before_ dropping scd_lock */ - + * buf to scd_buf_posted _before_ dropping scd_lock + */ spin_lock(&scd->scd_lock); - if (rc == 0) { + if (!rc) { if (!sv->sv_shuttingdown) return 0; spin_unlock(&scd->scd_lock); - /* srpc_shutdown_service might have tried to unlink me - * when my buf_mdh was still invalid */ + /* + * srpc_shutdown_service might have tried to unlink me + * when my buf_mdh was still invalid + */ LNetMDUnlink(buf->buf_mdh); spin_lock(&scd->scd_lock); return 0; @@ -514,9 +520,11 @@ srpc_add_buffer(struct swi_workitem *wi) struct srpc_buffer *buf; int rc = 0; - /* it's called by workitem scheduler threads, these threads + /* + * it's called by workitem scheduler threads, these threads * should have been set CPT affinity, so buffers will be posted - * on CPT local list of Portal */ + * on CPT local list of Portal + */ spin_lock(&scd->scd_lock); while (scd->scd_buf_adjust > 0 && @@ -527,7 +535,7 @@ srpc_add_buffer(struct swi_workitem *wi) spin_unlock(&scd->scd_lock); LIBCFS_ALLOC(buf, sizeof(*buf)); - if (buf == NULL) { + if (!buf) { CERROR("Failed to add new buf to service: %s\n", scd->scd_svc->sv_name); spin_lock(&scd->scd_lock); @@ -546,7 +554,7 @@ srpc_add_buffer(struct swi_workitem *wi) } rc = srpc_service_post_buffer(scd, buf); - if (rc != 0) + if (rc) break; /* buf has been freed inside */ LASSERT(scd->scd_buf_posting > 0); @@ -555,7 +563,7 @@ srpc_add_buffer(struct swi_workitem *wi) scd->scd_buf_low = max(2, scd->scd_buf_total / 4); } - if (rc != 0) { + if (rc) { scd->scd_buf_err_stamp = ktime_get_real_seconds(); scd->scd_buf_err = rc; @@ -607,12 +615,12 @@ srpc_service_add_buffers(struct srpc_service *sv, int nbuffer) * block all WIs pending on lst_sched_serial for a moment * which is not good but not fatal. */ - lst_wait_until(scd->scd_buf_err != 0 || - (scd->scd_buf_adjust == 0 && - scd->scd_buf_posting == 0), + lst_wait_until(scd->scd_buf_err || + (!scd->scd_buf_adjust && + !scd->scd_buf_posting), scd->scd_lock, "waiting for adding buffer\n"); - if (scd->scd_buf_err != 0 && rc == 0) + if (scd->scd_buf_err && !rc) rc = scd->scd_buf_err; spin_unlock(&scd->scd_lock); @@ -658,7 +666,7 @@ srpc_finish_service(struct srpc_service *sv) } if (scd->scd_buf_nposted > 0) { - CDEBUG(D_NET, "waiting for %d posted buffers to unlink", + CDEBUG(D_NET, "waiting for %d posted buffers to unlink\n", scd->scd_buf_nposted); spin_unlock(&scd->scd_lock); return 0; @@ -670,7 +678,7 @@ srpc_finish_service(struct srpc_service *sv) } rpc = list_entry(scd->scd_rpc_active.next, - struct srpc_server_rpc, srpc_list); + struct srpc_server_rpc, srpc_list); CNETERR("Active RPC %p on shutdown: sv %s, peer %s, wi %s scheduled %d running %d, ev fired %d type %d status %d lnet %d\n", rpc, sv->sv_name, libcfs_id2str(rpc->srpc_peer), swi_state2str(rpc->srpc_wi.swi_state), @@ -690,10 +698,10 @@ srpc_finish_service(struct srpc_service *sv) /* called with sv->sv_lock held */ static void srpc_service_recycle_buffer(struct srpc_service_cd *scd, srpc_buffer_t *buf) - __must_hold(&scd->scd_lock) +__must_hold(&scd->scd_lock) { if (!scd->scd_svc->sv_shuttingdown && scd->scd_buf_adjust >= 0) { - if (srpc_service_post_buffer(scd, buf) != 0) { + if (srpc_service_post_buffer(scd, buf)) { CWARN("Failed to post %s buffer\n", scd->scd_svc->sv_name); } @@ -706,7 +714,7 @@ srpc_service_recycle_buffer(struct srpc_service_cd *scd, srpc_buffer_t *buf) if (scd->scd_buf_adjust < 0) { scd->scd_buf_adjust++; if (scd->scd_buf_adjust < 0 && - scd->scd_buf_total == 0 && scd->scd_buf_posting == 0) { + !scd->scd_buf_total && !scd->scd_buf_posting) { CDEBUG(D_INFO, "Try to recycle %d buffers but nothing left\n", scd->scd_buf_adjust); @@ -732,9 +740,11 @@ srpc_abort_service(struct srpc_service *sv) cfs_percpt_for_each(scd, i, sv->sv_cpt_data) { spin_lock(&scd->scd_lock); - /* schedule in-flight RPCs to notice the abort, NB: + /* + * schedule in-flight RPCs to notice the abort, NB: * racing with incoming RPCs; complete fix should make test - * RPCs carry session ID in its headers */ + * RPCs carry session ID in its headers + */ list_for_each_entry(rpc, &scd->scd_rpc_active, srpc_list) { rpc->srpc_aborted = 1; swi_schedule_workitem(&rpc->srpc_wi); @@ -772,8 +782,10 @@ srpc_shutdown_service(srpc_service_t *sv) spin_unlock(&scd->scd_lock); - /* OK to traverse scd_buf_posted without lock, since no one - * touches scd_buf_posted now */ + /* + * OK to traverse scd_buf_posted without lock, since no one + * touches scd_buf_posted now + */ list_for_each_entry(buf, &scd->scd_buf_posted, buf_list) LNetMDUnlink(buf->buf_mdh); } @@ -786,15 +798,15 @@ srpc_send_request(srpc_client_rpc_t *rpc) int rc; ev->ev_fired = 0; - ev->ev_data = rpc; - ev->ev_type = SRPC_REQUEST_SENT; + ev->ev_data = rpc; + ev->ev_type = SRPC_REQUEST_SENT; rc = srpc_post_active_rdma(srpc_serv_portal(rpc->crpc_service), rpc->crpc_service, &rpc->crpc_reqstmsg, sizeof(srpc_msg_t), LNET_MD_OP_PUT, rpc->crpc_dest, LNET_NID_ANY, &rpc->crpc_reqstmdh, ev); - if (rc != 0) { + if (rc) { LASSERT(rc == -ENOMEM); ev->ev_fired = 1; /* no more event expected */ } @@ -809,8 +821,8 @@ srpc_prepare_reply(srpc_client_rpc_t *rpc) int rc; ev->ev_fired = 0; - ev->ev_data = rpc; - ev->ev_type = SRPC_REPLY_RCVD; + ev->ev_data = rpc; + ev->ev_type = SRPC_REPLY_RCVD; *id = srpc_next_id(); @@ -818,7 +830,7 @@ srpc_prepare_reply(srpc_client_rpc_t *rpc) &rpc->crpc_replymsg, sizeof(srpc_msg_t), LNET_MD_OP_PUT, rpc->crpc_dest, &rpc->crpc_replymdh, ev); - if (rc != 0) { + if (rc) { LASSERT(rc == -ENOMEM); ev->ev_fired = 1; /* no more event expected */ } @@ -830,28 +842,28 @@ srpc_prepare_bulk(srpc_client_rpc_t *rpc) { srpc_bulk_t *bk = &rpc->crpc_bulk; srpc_event_t *ev = &rpc->crpc_bulkev; - __u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.bulkid; + __u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.bulkid; int rc; int opt; LASSERT(bk->bk_niov <= LNET_MAX_IOV); - if (bk->bk_niov == 0) + if (!bk->bk_niov) return 0; /* nothing to do */ opt = bk->bk_sink ? LNET_MD_OP_PUT : LNET_MD_OP_GET; opt |= LNET_MD_KIOV; ev->ev_fired = 0; - ev->ev_data = rpc; - ev->ev_type = SRPC_BULK_REQ_RCVD; + ev->ev_data = rpc; + ev->ev_type = SRPC_BULK_REQ_RCVD; *id = srpc_next_id(); rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, 0, *id, &bk->bk_iovs[0], bk->bk_niov, opt, rpc->crpc_dest, &bk->bk_mdh, ev); - if (rc != 0) { + if (rc) { LASSERT(rc == -ENOMEM); ev->ev_fired = 1; /* no more event expected */ } @@ -867,20 +879,20 @@ srpc_do_bulk(struct srpc_server_rpc *rpc) int rc; int opt; - LASSERT(bk != NULL); + LASSERT(bk); opt = bk->bk_sink ? LNET_MD_OP_GET : LNET_MD_OP_PUT; opt |= LNET_MD_KIOV; ev->ev_fired = 0; - ev->ev_data = rpc; - ev->ev_type = bk->bk_sink ? SRPC_BULK_GET_RPLD : SRPC_BULK_PUT_SENT; + ev->ev_data = rpc; + ev->ev_type = bk->bk_sink ? SRPC_BULK_GET_RPLD : SRPC_BULK_PUT_SENT; rc = srpc_post_active_rdma(SRPC_RDMA_PORTAL, id, &bk->bk_iovs[0], bk->bk_niov, opt, rpc->srpc_peer, rpc->srpc_self, &bk->bk_mdh, ev); - if (rc != 0) + if (rc) ev->ev_fired = 1; /* no more event expected */ return rc; } @@ -890,33 +902,35 @@ static void srpc_server_rpc_done(struct srpc_server_rpc *rpc, int status) { struct srpc_service_cd *scd = rpc->srpc_scd; - struct srpc_service *sv = scd->scd_svc; + struct srpc_service *sv = scd->scd_svc; srpc_buffer_t *buffer; - LASSERT(status != 0 || rpc->srpc_wi.swi_state == SWI_STATE_DONE); + LASSERT(status || rpc->srpc_wi.swi_state == SWI_STATE_DONE); rpc->srpc_status = status; - CDEBUG_LIMIT(status == 0 ? D_NET : D_NETERROR, - "Server RPC %p done: service %s, peer %s, status %s:%d\n", - rpc, sv->sv_name, libcfs_id2str(rpc->srpc_peer), - swi_state2str(rpc->srpc_wi.swi_state), status); + CDEBUG_LIMIT(!status ? D_NET : D_NETERROR, + "Server RPC %p done: service %s, peer %s, status %s:%d\n", + rpc, sv->sv_name, libcfs_id2str(rpc->srpc_peer), + swi_state2str(rpc->srpc_wi.swi_state), status); - if (status != 0) { + if (status) { spin_lock(&srpc_data.rpc_glock); srpc_data.rpc_counters.rpcs_dropped++; spin_unlock(&srpc_data.rpc_glock); } - if (rpc->srpc_done != NULL) + if (rpc->srpc_done) (*rpc->srpc_done) (rpc); - LASSERT(rpc->srpc_bulk == NULL); + LASSERT(!rpc->srpc_bulk); spin_lock(&scd->scd_lock); - if (rpc->srpc_reqstbuf != NULL) { - /* NB might drop sv_lock in srpc_service_recycle_buffer, but - * sv won't go away for scd_rpc_active must not be empty */ + if (rpc->srpc_reqstbuf) { + /* + * NB might drop sv_lock in srpc_service_recycle_buffer, but + * sv won't go away for scd_rpc_active must not be empty + */ srpc_service_recycle_buffer(scd, rpc->srpc_reqstbuf); rpc->srpc_reqstbuf = NULL; } @@ -934,7 +948,7 @@ srpc_server_rpc_done(struct srpc_server_rpc *rpc, int status) if (!sv->sv_shuttingdown && !list_empty(&scd->scd_buf_blocked)) { buffer = list_entry(scd->scd_buf_blocked.next, - srpc_buffer_t, buf_list); + srpc_buffer_t, buf_list); list_del(&buffer->buf_list); srpc_init_server_rpc(rpc, scd, buffer); @@ -945,7 +959,6 @@ srpc_server_rpc_done(struct srpc_server_rpc *rpc, int status) } spin_unlock(&scd->scd_lock); - return; } /* handles an incoming RPC */ @@ -965,7 +978,7 @@ srpc_handle_rpc(swi_workitem_t *wi) if (sv->sv_shuttingdown || rpc->srpc_aborted) { spin_unlock(&scd->scd_lock); - if (rpc->srpc_bulk != NULL) + if (rpc->srpc_bulk) LNetMDUnlink(rpc->srpc_bulk->bk_mdh); LNetMDUnlink(rpc->srpc_replymdh); @@ -988,7 +1001,7 @@ srpc_handle_rpc(swi_workitem_t *wi) msg = &rpc->srpc_reqstbuf->buf_msg; reply = &rpc->srpc_replymsg.msg_body.reply; - if (msg->msg_magic == 0) { + if (!msg->msg_magic) { /* moaned already in srpc_lnet_ev_handler */ srpc_server_rpc_done(rpc, EBADMSG); return 1; @@ -1004,8 +1017,8 @@ srpc_handle_rpc(swi_workitem_t *wi) } else { reply->status = 0; rc = (*sv->sv_handler)(rpc); - LASSERT(reply->status == 0 || !rpc->srpc_bulk); - if (rc != 0) { + LASSERT(!reply->status || !rpc->srpc_bulk); + if (rc) { srpc_server_rpc_done(rpc, rc); return 1; } @@ -1013,9 +1026,9 @@ srpc_handle_rpc(swi_workitem_t *wi) wi->swi_state = SWI_STATE_BULK_STARTED; - if (rpc->srpc_bulk != NULL) { + if (rpc->srpc_bulk) { rc = srpc_do_bulk(rpc); - if (rc == 0) + if (!rc) return 0; /* wait for bulk */ LASSERT(ev->ev_fired); @@ -1023,15 +1036,15 @@ srpc_handle_rpc(swi_workitem_t *wi) } } case SWI_STATE_BULK_STARTED: - LASSERT(rpc->srpc_bulk == NULL || ev->ev_fired); + LASSERT(!rpc->srpc_bulk || ev->ev_fired); - if (rpc->srpc_bulk != NULL) { + if (rpc->srpc_bulk) { rc = ev->ev_status; - if (sv->sv_bulk_ready != NULL) + if (sv->sv_bulk_ready) rc = (*sv->sv_bulk_ready) (rpc, rc); - if (rc != 0) { + if (rc) { srpc_server_rpc_done(rpc, rc); return 1; } @@ -1039,7 +1052,7 @@ srpc_handle_rpc(swi_workitem_t *wi) wi->swi_state = SWI_STATE_REPLY_SUBMITTED; rc = srpc_send_reply(rpc); - if (rc == 0) + if (!rc) return 0; /* wait for reply */ srpc_server_rpc_done(rpc, rc); return 1; @@ -1067,8 +1080,8 @@ srpc_client_rpc_expired(void *data) srpc_client_rpc_t *rpc = data; CWARN("Client RPC expired: service %d, peer %s, timeout %d.\n", - rpc->crpc_service, libcfs_id2str(rpc->crpc_dest), - rpc->crpc_timeout); + rpc->crpc_service, libcfs_id2str(rpc->crpc_dest), + rpc->crpc_timeout); spin_lock(&rpc->crpc_lock); @@ -1082,32 +1095,32 @@ srpc_client_rpc_expired(void *data) spin_unlock(&srpc_data.rpc_glock); } -inline void +static void srpc_add_client_rpc_timer(srpc_client_rpc_t *rpc) { - stt_timer_t *timer = &rpc->crpc_timer; + struct stt_timer *timer = &rpc->crpc_timer; - if (rpc->crpc_timeout == 0) + if (!rpc->crpc_timeout) return; INIT_LIST_HEAD(&timer->stt_list); - timer->stt_data = rpc; - timer->stt_func = srpc_client_rpc_expired; + timer->stt_data = rpc; + timer->stt_func = srpc_client_rpc_expired; timer->stt_expires = ktime_get_real_seconds() + rpc->crpc_timeout; stt_add_timer(timer); - return; } /* * Called with rpc->crpc_lock held. * * Upon exit the RPC expiry timer is not queued and the handler is not - * running on any CPU. */ + * running on any CPU. + */ static void srpc_del_client_rpc_timer(srpc_client_rpc_t *rpc) { /* timer not planted or already exploded */ - if (rpc->crpc_timeout == 0) + if (!rpc->crpc_timeout) return; /* timer successfully defused */ @@ -1115,7 +1128,7 @@ srpc_del_client_rpc_timer(srpc_client_rpc_t *rpc) return; /* timer detonated, wait for it to explode */ - while (rpc->crpc_timeout != 0) { + while (rpc->crpc_timeout) { spin_unlock(&rpc->crpc_lock); schedule(); @@ -1129,20 +1142,20 @@ srpc_client_rpc_done(srpc_client_rpc_t *rpc, int status) { swi_workitem_t *wi = &rpc->crpc_wi; - LASSERT(status != 0 || wi->swi_state == SWI_STATE_DONE); + LASSERT(status || wi->swi_state == SWI_STATE_DONE); spin_lock(&rpc->crpc_lock); rpc->crpc_closed = 1; - if (rpc->crpc_status == 0) + if (!rpc->crpc_status) rpc->crpc_status = status; srpc_del_client_rpc_timer(rpc); - CDEBUG_LIMIT((status == 0) ? D_NET : D_NETERROR, - "Client RPC done: service %d, peer %s, status %s:%d:%d\n", - rpc->crpc_service, libcfs_id2str(rpc->crpc_dest), - swi_state2str(wi->swi_state), rpc->crpc_aborted, status); + CDEBUG_LIMIT(!status ? D_NET : D_NETERROR, + "Client RPC done: service %d, peer %s, status %s:%d:%d\n", + rpc->crpc_service, libcfs_id2str(rpc->crpc_dest), + swi_state2str(wi->swi_state), rpc->crpc_aborted, status); /* * No one can schedule me now since: @@ -1158,7 +1171,6 @@ srpc_client_rpc_done(srpc_client_rpc_t *rpc, int status) spin_unlock(&rpc->crpc_lock); (*rpc->crpc_done)(rpc); - return; } /* sends an outgoing RPC */ @@ -1170,11 +1182,11 @@ srpc_send_rpc(swi_workitem_t *wi) srpc_msg_t *reply; int do_bulk; - LASSERT(wi != NULL); + LASSERT(wi); rpc = wi->swi_workitem.wi_data; - LASSERT(rpc != NULL); + LASSERT(rpc); LASSERT(wi == &rpc->crpc_wi); reply = &rpc->crpc_replymsg; @@ -1196,13 +1208,13 @@ srpc_send_rpc(swi_workitem_t *wi) LASSERT(!srpc_event_pending(rpc)); rc = srpc_prepare_reply(rpc); - if (rc != 0) { + if (rc) { srpc_client_rpc_done(rpc, rc); return 1; } rc = srpc_prepare_bulk(rpc); - if (rc != 0) + if (rc) break; wi->swi_state = SWI_STATE_REQUEST_SUBMITTED; @@ -1210,14 +1222,16 @@ srpc_send_rpc(swi_workitem_t *wi) break; case SWI_STATE_REQUEST_SUBMITTED: - /* CAVEAT EMPTOR: rqtev, rpyev, and bulkev may come in any + /* + * CAVEAT EMPTOR: rqtev, rpyev, and bulkev may come in any * order; however, they're processed in a strict order: - * rqt, rpy, and bulk. */ + * rqt, rpy, and bulk. + */ if (!rpc->crpc_reqstev.ev_fired) break; rc = rpc->crpc_reqstev.ev_status; - if (rc != 0) + if (rc) break; wi->swi_state = SWI_STATE_REQUEST_SENT; @@ -1229,7 +1243,7 @@ srpc_send_rpc(swi_workitem_t *wi) break; rc = rpc->crpc_replyev.ev_status; - if (rc != 0) + if (rc) break; srpc_unpack_msg_hdr(reply); @@ -1244,7 +1258,7 @@ srpc_send_rpc(swi_workitem_t *wi) break; } - if (do_bulk && reply->msg_body.reply.status != 0) { + if (do_bulk && reply->msg_body.reply.status) { CWARN("Remote error %d at %s, unlink bulk buffer in case peer didn't initiate bulk transfer\n", reply->msg_body.reply.status, libcfs_id2str(rpc->crpc_dest)); @@ -1259,12 +1273,14 @@ srpc_send_rpc(swi_workitem_t *wi) rc = do_bulk ? rpc->crpc_bulkev.ev_status : 0; - /* Bulk buffer was unlinked due to remote error. Clear error + /* + * Bulk buffer was unlinked due to remote error. Clear error * since reply buffer still contains valid data. * NB rpc->crpc_done shouldn't look into bulk data in case of - * remote error. */ + * remote error. + */ if (do_bulk && rpc->crpc_bulkev.ev_lnet == LNET_EVENT_UNLINK && - rpc->crpc_status == 0 && reply->msg_body.reply.status != 0) + !rpc->crpc_status && reply->msg_body.reply.status) rc = 0; wi->swi_state = SWI_STATE_DONE; @@ -1272,7 +1288,7 @@ srpc_send_rpc(swi_workitem_t *wi) return 1; } - if (rc != 0) { + if (rc) { spin_lock(&rpc->crpc_lock); srpc_abort_rpc(rpc, rc); spin_unlock(&rpc->crpc_lock); @@ -1294,15 +1310,15 @@ abort: srpc_client_rpc_t * srpc_create_client_rpc(lnet_process_id_t peer, int service, - int nbulkiov, int bulklen, - void (*rpc_done)(srpc_client_rpc_t *), - void (*rpc_fini)(srpc_client_rpc_t *), void *priv) + int nbulkiov, int bulklen, + void (*rpc_done)(srpc_client_rpc_t *), + void (*rpc_fini)(srpc_client_rpc_t *), void *priv) { srpc_client_rpc_t *rpc; LIBCFS_ALLOC(rpc, offsetof(srpc_client_rpc_t, crpc_bulk.bk_iovs[nbulkiov])); - if (rpc == NULL) + if (!rpc) return NULL; srpc_init_client_rpc(rpc, peer, service, nbulkiov, @@ -1314,21 +1330,19 @@ srpc_create_client_rpc(lnet_process_id_t peer, int service, void srpc_abort_rpc(srpc_client_rpc_t *rpc, int why) { - LASSERT(why != 0); + LASSERT(why); if (rpc->crpc_aborted || /* already aborted */ - rpc->crpc_closed) /* callback imminent */ + rpc->crpc_closed) /* callback imminent */ return; - CDEBUG(D_NET, - "Aborting RPC: service %d, peer %s, state %s, why %d\n", - rpc->crpc_service, libcfs_id2str(rpc->crpc_dest), - swi_state2str(rpc->crpc_wi.swi_state), why); + CDEBUG(D_NET, "Aborting RPC: service %d, peer %s, state %s, why %d\n", + rpc->crpc_service, libcfs_id2str(rpc->crpc_dest), + swi_state2str(rpc->crpc_wi.swi_state), why); rpc->crpc_aborted = 1; - rpc->crpc_status = why; + rpc->crpc_status = why; swi_schedule_workitem(&rpc->crpc_wi); - return; } /* called with rpc->crpc_lock held */ @@ -1339,12 +1353,11 @@ srpc_post_rpc(srpc_client_rpc_t *rpc) LASSERT(srpc_data.rpc_state == SRPC_STATE_RUNNING); CDEBUG(D_NET, "Posting RPC: peer %s, service %d, timeout %d\n", - libcfs_id2str(rpc->crpc_dest), rpc->crpc_service, - rpc->crpc_timeout); + libcfs_id2str(rpc->crpc_dest), rpc->crpc_service, + rpc->crpc_timeout); srpc_add_client_rpc_timer(rpc); swi_schedule_workitem(&rpc->crpc_wi); - return; } int @@ -1358,15 +1371,17 @@ srpc_send_reply(struct srpc_server_rpc *rpc) __u64 rpyid; int rc; - LASSERT(buffer != NULL); + LASSERT(buffer); rpyid = buffer->buf_msg.msg_body.reqst.rpyid; spin_lock(&scd->scd_lock); if (!sv->sv_shuttingdown && !srpc_serv_is_framework(sv)) { - /* Repost buffer before replying since test client - * might send me another RPC once it gets the reply */ - if (srpc_service_post_buffer(scd, buffer) != 0) + /* + * Repost buffer before replying since test client + * might send me another RPC once it gets the reply + */ + if (srpc_service_post_buffer(scd, buffer)) CWARN("Failed to repost %s buffer\n", sv->sv_name); rpc->srpc_reqstbuf = NULL; } @@ -1374,18 +1389,18 @@ srpc_send_reply(struct srpc_server_rpc *rpc) spin_unlock(&scd->scd_lock); ev->ev_fired = 0; - ev->ev_data = rpc; - ev->ev_type = SRPC_REPLY_SENT; + ev->ev_data = rpc; + ev->ev_type = SRPC_REPLY_SENT; - msg->msg_magic = SRPC_MSG_MAGIC; + msg->msg_magic = SRPC_MSG_MAGIC; msg->msg_version = SRPC_MSG_VERSION; - msg->msg_type = srpc_service2reply(sv->sv_id); + msg->msg_type = srpc_service2reply(sv->sv_id); rc = srpc_post_active_rdma(SRPC_RDMA_PORTAL, rpyid, msg, sizeof(*msg), LNET_MD_OP_PUT, rpc->srpc_peer, rpc->srpc_self, &rpc->srpc_replymdh, ev); - if (rc != 0) + if (rc) ev->ev_fired = 1; /* no more event expected */ return rc; } @@ -1405,10 +1420,17 @@ srpc_lnet_ev_handler(lnet_event_t *ev) LASSERT(!in_interrupt()); - if (ev->status != 0) { + if (ev->status) { + __u32 errors; + spin_lock(&srpc_data.rpc_glock); - srpc_data.rpc_counters.errors++; + if (ev->status != -ECANCELED) /* cancellation is not error */ + srpc_data.rpc_counters.errors++; + errors = srpc_data.rpc_counters.errors; spin_unlock(&srpc_data.rpc_glock); + + CNETERR("LNet event status %d type %d, RPC errors %u\n", + ev->status, ev->type, errors); } rpcev->ev_lnet = ev->type; @@ -1419,7 +1441,7 @@ srpc_lnet_ev_handler(lnet_event_t *ev) rpcev->ev_status, rpcev->ev_type, rpcev->ev_lnet); LBUG(); case SRPC_REQUEST_SENT: - if (ev->status == 0 && ev->type != LNET_EVENT_UNLINK) { + if (!ev->status && ev->type != LNET_EVENT_UNLINK) { spin_lock(&srpc_data.rpc_glock); srpc_data.rpc_counters.rpcs_sent++; spin_unlock(&srpc_data.rpc_glock); @@ -1441,8 +1463,8 @@ srpc_lnet_ev_handler(lnet_event_t *ev) spin_lock(&crpc->crpc_lock); - LASSERT(rpcev->ev_fired == 0); - rpcev->ev_fired = 1; + LASSERT(!rpcev->ev_fired); + rpcev->ev_fired = 1; rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ? -EINTR : ev->status; swi_schedule_workitem(&crpc->crpc_wi); @@ -1460,9 +1482,9 @@ srpc_lnet_ev_handler(lnet_event_t *ev) LASSERT(ev->unlinked); LASSERT(ev->type == LNET_EVENT_PUT || - ev->type == LNET_EVENT_UNLINK); + ev->type == LNET_EVENT_UNLINK); LASSERT(ev->type != LNET_EVENT_UNLINK || - sv->sv_shuttingdown); + sv->sv_shuttingdown); buffer = container_of(ev->md.start, srpc_buffer_t, buf_msg); buffer->buf_peer = ev->initiator; @@ -1472,21 +1494,23 @@ srpc_lnet_ev_handler(lnet_event_t *ev) scd->scd_buf_nposted--; if (sv->sv_shuttingdown) { - /* Leave buffer on scd->scd_buf_nposted since - * srpc_finish_service needs to traverse it. */ + /* + * Leave buffer on scd->scd_buf_nposted since + * srpc_finish_service needs to traverse it. + */ spin_unlock(&scd->scd_lock); break; } - if (scd->scd_buf_err_stamp != 0 && + if (scd->scd_buf_err_stamp && scd->scd_buf_err_stamp < ktime_get_real_seconds()) { /* re-enable adding buffer */ scd->scd_buf_err_stamp = 0; scd->scd_buf_err = 0; } - if (scd->scd_buf_err == 0 && /* adding buffer is enabled */ - scd->scd_buf_adjust == 0 && + if (!scd->scd_buf_err && /* adding buffer is enabled */ + !scd->scd_buf_adjust && scd->scd_buf_nposted < scd->scd_buf_low) { scd->scd_buf_adjust = max(scd->scd_buf_total / 2, SFW_TEST_WI_MIN); @@ -1497,7 +1521,7 @@ srpc_lnet_ev_handler(lnet_event_t *ev) msg = &buffer->buf_msg; type = srpc_service2request(sv->sv_id); - if (ev->status != 0 || ev->mlength != sizeof(*msg) || + if (ev->status || ev->mlength != sizeof(*msg) || (msg->msg_type != type && msg->msg_type != __swab32(type)) || (msg->msg_magic != SRPC_MSG_MAGIC && @@ -1507,25 +1531,27 @@ srpc_lnet_ev_handler(lnet_event_t *ev) ev->status, ev->mlength, msg->msg_type, msg->msg_magic); - /* NB can't call srpc_service_recycle_buffer here since + /* + * NB can't call srpc_service_recycle_buffer here since * it may call LNetM[DE]Attach. The invalid magic tells - * srpc_handle_rpc to drop this RPC */ + * srpc_handle_rpc to drop this RPC + */ msg->msg_magic = 0; } if (!list_empty(&scd->scd_rpc_free)) { srpc = list_entry(scd->scd_rpc_free.next, - struct srpc_server_rpc, - srpc_list); + struct srpc_server_rpc, + srpc_list); list_del(&srpc->srpc_list); srpc_init_server_rpc(srpc, scd, buffer); list_add_tail(&srpc->srpc_list, - &scd->scd_rpc_active); + &scd->scd_rpc_active); swi_schedule_workitem(&srpc->srpc_wi); } else { list_add_tail(&buffer->buf_list, - &scd->scd_buf_blocked); + &scd->scd_buf_blocked); } spin_unlock(&scd->scd_lock); @@ -1537,14 +1563,14 @@ srpc_lnet_ev_handler(lnet_event_t *ev) case SRPC_BULK_GET_RPLD: LASSERT(ev->type == LNET_EVENT_SEND || - ev->type == LNET_EVENT_REPLY || - ev->type == LNET_EVENT_UNLINK); + ev->type == LNET_EVENT_REPLY || + ev->type == LNET_EVENT_UNLINK); if (!ev->unlinked) break; /* wait for final event */ case SRPC_BULK_PUT_SENT: - if (ev->status == 0 && ev->type != LNET_EVENT_UNLINK) { + if (!ev->status && ev->type != LNET_EVENT_UNLINK) { spin_lock(&srpc_data.rpc_glock); if (rpcev->ev_type == SRPC_BULK_GET_RPLD) @@ -1556,13 +1582,13 @@ srpc_lnet_ev_handler(lnet_event_t *ev) } case SRPC_REPLY_SENT: srpc = rpcev->ev_data; - scd = srpc->srpc_scd; + scd = srpc->srpc_scd; LASSERT(rpcev == &srpc->srpc_ev); spin_lock(&scd->scd_lock); - rpcev->ev_fired = 1; + rpcev->ev_fired = 1; rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ? -EINTR : ev->status; swi_schedule_workitem(&srpc->srpc_wi); @@ -1587,7 +1613,7 @@ srpc_startup(void) srpc_data.rpc_state = SRPC_STATE_NONE; - rc = LNetNIInit(LUSTRE_SRV_LNET_PID); + rc = LNetNIInit(LNET_PID_LUSTRE); if (rc < 0) { CERROR("LNetNIInit() has failed: %d\n", rc); return rc; @@ -1597,22 +1623,22 @@ srpc_startup(void) LNetInvalidateHandle(&srpc_data.rpc_lnet_eq); rc = LNetEQAlloc(0, srpc_lnet_ev_handler, &srpc_data.rpc_lnet_eq); - if (rc != 0) { + if (rc) { CERROR("LNetEQAlloc() has failed: %d\n", rc); goto bail; } rc = LNetSetLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL); - LASSERT(rc == 0); + LASSERT(!rc); rc = LNetSetLazyPortal(SRPC_REQUEST_PORTAL); - LASSERT(rc == 0); + LASSERT(!rc); srpc_data.rpc_state = SRPC_STATE_EQ_INIT; rc = stt_startup(); bail: - if (rc != 0) + if (rc) srpc_shutdown(); else srpc_data.rpc_state = SRPC_STATE_RUNNING; @@ -1639,9 +1665,8 @@ srpc_shutdown(void) for (i = 0; i <= SRPC_SERVICE_MAX_ID; i++) { srpc_service_t *sv = srpc_data.rpc_services[i]; - LASSERTF(sv == NULL, - "service not empty: id %d, name %s\n", - i, sv->sv_name); + LASSERTF(!sv, "service not empty: id %d, name %s\n", + i, sv->sv_name); } spin_unlock(&srpc_data.rpc_glock); @@ -1651,13 +1676,11 @@ srpc_shutdown(void) case SRPC_STATE_EQ_INIT: rc = LNetClearLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL); rc = LNetClearLazyPortal(SRPC_REQUEST_PORTAL); - LASSERT(rc == 0); + LASSERT(!rc); rc = LNetEQFree(srpc_data.rpc_lnet_eq); - LASSERT(rc == 0); /* the EQ should have no user by now */ + LASSERT(!rc); /* the EQ should have no user by now */ case SRPC_STATE_NI_INIT: LNetNIFini(); } - - return; } diff --git a/drivers/staging/lustre/lnet/selftest/rpc.h b/drivers/staging/lustre/lnet/selftest/rpc.h index 6b4a32a90..a79c315f2 100644 --- a/drivers/staging/lustre/lnet/selftest/rpc.h +++ b/drivers/staging/lustre/lnet/selftest/rpc.h @@ -45,24 +45,24 @@ * XXX: *REPLY == *REQST + 1 */ typedef enum { - SRPC_MSG_MKSN_REQST = 0, - SRPC_MSG_MKSN_REPLY = 1, - SRPC_MSG_RMSN_REQST = 2, - SRPC_MSG_RMSN_REPLY = 3, - SRPC_MSG_BATCH_REQST = 4, - SRPC_MSG_BATCH_REPLY = 5, - SRPC_MSG_STAT_REQST = 6, - SRPC_MSG_STAT_REPLY = 7, - SRPC_MSG_TEST_REQST = 8, - SRPC_MSG_TEST_REPLY = 9, - SRPC_MSG_DEBUG_REQST = 10, - SRPC_MSG_DEBUG_REPLY = 11, - SRPC_MSG_BRW_REQST = 12, - SRPC_MSG_BRW_REPLY = 13, - SRPC_MSG_PING_REQST = 14, - SRPC_MSG_PING_REPLY = 15, - SRPC_MSG_JOIN_REQST = 16, - SRPC_MSG_JOIN_REPLY = 17, + SRPC_MSG_MKSN_REQST = 0, + SRPC_MSG_MKSN_REPLY = 1, + SRPC_MSG_RMSN_REQST = 2, + SRPC_MSG_RMSN_REPLY = 3, + SRPC_MSG_BATCH_REQST = 4, + SRPC_MSG_BATCH_REPLY = 5, + SRPC_MSG_STAT_REQST = 6, + SRPC_MSG_STAT_REPLY = 7, + SRPC_MSG_TEST_REQST = 8, + SRPC_MSG_TEST_REPLY = 9, + SRPC_MSG_DEBUG_REQST = 10, + SRPC_MSG_DEBUG_REPLY = 11, + SRPC_MSG_BRW_REQST = 12, + SRPC_MSG_BRW_REPLY = 13, + SRPC_MSG_PING_REQST = 14, + SRPC_MSG_PING_REPLY = 15, + SRPC_MSG_JOIN_REQST = 16, + SRPC_MSG_JOIN_REPLY = 17, } srpc_msg_type_t; /* CAVEAT EMPTOR: @@ -78,127 +78,127 @@ typedef struct { } WIRE_ATTR srpc_generic_reqst_t; typedef struct { - __u32 status; - lst_sid_t sid; + __u32 status; + lst_sid_t sid; } WIRE_ATTR srpc_generic_reply_t; /* FRAMEWORK RPCs */ typedef struct { - __u64 mksn_rpyid; /* reply buffer matchbits */ - lst_sid_t mksn_sid; /* session id */ - __u32 mksn_force; /* use brute force */ + __u64 mksn_rpyid; /* reply buffer matchbits */ + lst_sid_t mksn_sid; /* session id */ + __u32 mksn_force; /* use brute force */ char mksn_name[LST_NAME_SIZE]; } WIRE_ATTR srpc_mksn_reqst_t; /* make session request */ typedef struct { - __u32 mksn_status; /* session status */ - lst_sid_t mksn_sid; /* session id */ - __u32 mksn_timeout; /* session timeout */ - char mksn_name[LST_NAME_SIZE]; + __u32 mksn_status; /* session status */ + lst_sid_t mksn_sid; /* session id */ + __u32 mksn_timeout; /* session timeout */ + char mksn_name[LST_NAME_SIZE]; } WIRE_ATTR srpc_mksn_reply_t; /* make session reply */ typedef struct { - __u64 rmsn_rpyid; /* reply buffer matchbits */ - lst_sid_t rmsn_sid; /* session id */ + __u64 rmsn_rpyid; /* reply buffer matchbits */ + lst_sid_t rmsn_sid; /* session id */ } WIRE_ATTR srpc_rmsn_reqst_t; /* remove session request */ typedef struct { - __u32 rmsn_status; - lst_sid_t rmsn_sid; /* session id */ + __u32 rmsn_status; + lst_sid_t rmsn_sid; /* session id */ } WIRE_ATTR srpc_rmsn_reply_t; /* remove session reply */ typedef struct { - __u64 join_rpyid; /* reply buffer matchbits */ - lst_sid_t join_sid; /* session id to join */ - char join_group[LST_NAME_SIZE]; /* group name */ + __u64 join_rpyid; /* reply buffer matchbits */ + lst_sid_t join_sid; /* session id to join */ + char join_group[LST_NAME_SIZE]; /* group name */ } WIRE_ATTR srpc_join_reqst_t; typedef struct { - __u32 join_status; /* returned status */ - lst_sid_t join_sid; /* session id */ - __u32 join_timeout; /* # seconds' inactivity to + __u32 join_status; /* returned status */ + lst_sid_t join_sid; /* session id */ + __u32 join_timeout; /* # seconds' inactivity to * expire */ - char join_session[LST_NAME_SIZE]; /* session name */ + char join_session[LST_NAME_SIZE]; /* session name */ } WIRE_ATTR srpc_join_reply_t; typedef struct { - __u64 dbg_rpyid; /* reply buffer matchbits */ - lst_sid_t dbg_sid; /* session id */ - __u32 dbg_flags; /* bitmap of debug */ + __u64 dbg_rpyid; /* reply buffer matchbits */ + lst_sid_t dbg_sid; /* session id */ + __u32 dbg_flags; /* bitmap of debug */ } WIRE_ATTR srpc_debug_reqst_t; typedef struct { - __u32 dbg_status; /* returned code */ - lst_sid_t dbg_sid; /* session id */ - __u32 dbg_timeout; /* session timeout */ - __u32 dbg_nbatch; /* # of batches in the node */ - char dbg_name[LST_NAME_SIZE]; /* session name */ + __u32 dbg_status; /* returned code */ + lst_sid_t dbg_sid; /* session id */ + __u32 dbg_timeout; /* session timeout */ + __u32 dbg_nbatch; /* # of batches in the node */ + char dbg_name[LST_NAME_SIZE]; /* session name */ } WIRE_ATTR srpc_debug_reply_t; -#define SRPC_BATCH_OPC_RUN 1 -#define SRPC_BATCH_OPC_STOP 2 -#define SRPC_BATCH_OPC_QUERY 3 +#define SRPC_BATCH_OPC_RUN 1 +#define SRPC_BATCH_OPC_STOP 2 +#define SRPC_BATCH_OPC_QUERY 3 typedef struct { - __u64 bar_rpyid; /* reply buffer matchbits */ - lst_sid_t bar_sid; /* session id */ - lst_bid_t bar_bid; /* batch id */ - __u32 bar_opc; /* create/start/stop batch */ - __u32 bar_testidx; /* index of test */ - __u32 bar_arg; /* parameters */ + __u64 bar_rpyid; /* reply buffer matchbits */ + lst_sid_t bar_sid; /* session id */ + lst_bid_t bar_bid; /* batch id */ + __u32 bar_opc; /* create/start/stop batch */ + __u32 bar_testidx; /* index of test */ + __u32 bar_arg; /* parameters */ } WIRE_ATTR srpc_batch_reqst_t; typedef struct { - __u32 bar_status; /* status of request */ - lst_sid_t bar_sid; /* session id */ - __u32 bar_active; /* # of active tests in batch/test */ - __u32 bar_time; /* remained time */ + __u32 bar_status; /* status of request */ + lst_sid_t bar_sid; /* session id */ + __u32 bar_active; /* # of active tests in batch/test */ + __u32 bar_time; /* remained time */ } WIRE_ATTR srpc_batch_reply_t; typedef struct { - __u64 str_rpyid; /* reply buffer matchbits */ - lst_sid_t str_sid; /* session id */ - __u32 str_type; /* type of stat */ + __u64 str_rpyid; /* reply buffer matchbits */ + lst_sid_t str_sid; /* session id */ + __u32 str_type; /* type of stat */ } WIRE_ATTR srpc_stat_reqst_t; typedef struct { - __u32 str_status; - lst_sid_t str_sid; - sfw_counters_t str_fw; + __u32 str_status; + lst_sid_t str_sid; + sfw_counters_t str_fw; srpc_counters_t str_rpc; lnet_counters_t str_lnet; } WIRE_ATTR srpc_stat_reply_t; typedef struct { - __u32 blk_opc; /* bulk operation code */ - __u32 blk_npg; /* # of pages */ - __u32 blk_flags; /* reserved flags */ + __u32 blk_opc; /* bulk operation code */ + __u32 blk_npg; /* # of pages */ + __u32 blk_flags; /* reserved flags */ } WIRE_ATTR test_bulk_req_t; typedef struct { - __u16 blk_opc; /* bulk operation code */ - __u16 blk_flags; /* data check flags */ - __u32 blk_len; /* data length */ - __u32 blk_offset; /* reserved: offset */ + __u16 blk_opc; /* bulk operation code */ + __u16 blk_flags; /* data check flags */ + __u32 blk_len; /* data length */ + __u32 blk_offset; /* reserved: offset */ } WIRE_ATTR test_bulk_req_v1_t; typedef struct { - __u32 png_size; /* size of ping message */ - __u32 png_flags; /* reserved flags */ + __u32 png_size; /* size of ping message */ + __u32 png_flags; /* reserved flags */ } WIRE_ATTR test_ping_req_t; typedef struct { - __u64 tsr_rpyid; /* reply buffer matchbits */ - __u64 tsr_bulkid; /* bulk buffer matchbits */ + __u64 tsr_rpyid; /* reply buffer matchbits */ + __u64 tsr_bulkid; /* bulk buffer matchbits */ lst_sid_t tsr_sid; /* session id */ lst_bid_t tsr_bid; /* batch id */ - __u32 tsr_service; /* test type: bulk|ping|... */ - __u32 tsr_loop; /* test client loop count or + __u32 tsr_service; /* test type: bulk|ping|... */ + __u32 tsr_loop; /* test client loop count or * # server buffers needed */ - __u32 tsr_concur; /* concurrency of test */ - __u8 tsr_is_client; /* is test client or not */ + __u32 tsr_concur; /* concurrency of test */ + __u8 tsr_is_client; /* is test client or not */ __u8 tsr_stop_onerr; /* stop on error */ - __u32 tsr_ndest; /* # of dest nodes */ + __u32 tsr_ndest; /* # of dest nodes */ union { test_ping_req_t ping; @@ -208,7 +208,7 @@ typedef struct { } WIRE_ATTR srpc_test_reqst_t; typedef struct { - __u32 tsr_status; /* returned code */ + __u32 tsr_status; /* returned code */ lst_sid_t tsr_sid; } WIRE_ATTR srpc_test_reply_t; @@ -228,19 +228,19 @@ typedef struct { } WIRE_ATTR srpc_ping_reply_t; typedef struct { - __u64 brw_rpyid; /* reply buffer matchbits */ - __u64 brw_bulkid; /* bulk buffer matchbits */ - __u32 brw_rw; /* read or write */ - __u32 brw_len; /* bulk data len */ - __u32 brw_flags; /* bulk data patterns */ + __u64 brw_rpyid; /* reply buffer matchbits */ + __u64 brw_bulkid; /* bulk buffer matchbits */ + __u32 brw_rw; /* read or write */ + __u32 brw_len; /* bulk data len */ + __u32 brw_flags; /* bulk data patterns */ } WIRE_ATTR srpc_brw_reqst_t; /* bulk r/w request */ typedef struct { __u32 brw_status; } WIRE_ATTR srpc_brw_reply_t; /* bulk r/w reply */ -#define SRPC_MSG_MAGIC 0xeeb0f00d -#define SRPC_MSG_VERSION 1 +#define SRPC_MSG_MAGIC 0xeeb0f00d +#define SRPC_MSG_VERSION 1 typedef struct srpc_msg { __u32 msg_magic; /* magic number */ @@ -281,8 +281,10 @@ srpc_unpack_msg_hdr(srpc_msg_t *msg) if (msg->msg_magic == SRPC_MSG_MAGIC) return; /* no flipping needed */ - /* We do not swap the magic number here as it is needed to - determine whether the body needs to be swapped. */ + /* + * We do not swap the magic number here as it is needed to + * determine whether the body needs to be swapped. + */ /* __swab32s(&msg->msg_magic); */ __swab32s(&msg->msg_type); __swab32s(&msg->msg_version); diff --git a/drivers/staging/lustre/lnet/selftest/selftest.h b/drivers/staging/lustre/lnet/selftest/selftest.h index 870498339..e689ca184 100644 --- a/drivers/staging/lustre/lnet/selftest/selftest.h +++ b/drivers/staging/lustre/lnet/selftest/selftest.h @@ -56,14 +56,14 @@ #define MADE_WITHOUT_COMPROMISE #endif -#define SWI_STATE_NEWBORN 0 -#define SWI_STATE_REPLY_SUBMITTED 1 -#define SWI_STATE_REPLY_SENT 2 -#define SWI_STATE_REQUEST_SUBMITTED 3 -#define SWI_STATE_REQUEST_SENT 4 -#define SWI_STATE_REPLY_RECEIVED 5 -#define SWI_STATE_BULK_STARTED 6 -#define SWI_STATE_DONE 10 +#define SWI_STATE_NEWBORN 0 +#define SWI_STATE_REPLY_SUBMITTED 1 +#define SWI_STATE_REPLY_SENT 2 +#define SWI_STATE_REQUEST_SUBMITTED 3 +#define SWI_STATE_REQUEST_SENT 4 +#define SWI_STATE_REPLY_RECEIVED 5 +#define SWI_STATE_BULK_STARTED 6 +#define SWI_STATE_DONE 10 /* forward refs */ struct srpc_service; @@ -74,31 +74,31 @@ struct sfw_test_instance; /* services below SRPC_FRAMEWORK_SERVICE_MAX_ID are framework * services, e.g. create/modify session. */ -#define SRPC_SERVICE_DEBUG 0 -#define SRPC_SERVICE_MAKE_SESSION 1 -#define SRPC_SERVICE_REMOVE_SESSION 2 -#define SRPC_SERVICE_BATCH 3 -#define SRPC_SERVICE_TEST 4 -#define SRPC_SERVICE_QUERY_STAT 5 -#define SRPC_SERVICE_JOIN 6 -#define SRPC_FRAMEWORK_SERVICE_MAX_ID 10 +#define SRPC_SERVICE_DEBUG 0 +#define SRPC_SERVICE_MAKE_SESSION 1 +#define SRPC_SERVICE_REMOVE_SESSION 2 +#define SRPC_SERVICE_BATCH 3 +#define SRPC_SERVICE_TEST 4 +#define SRPC_SERVICE_QUERY_STAT 5 +#define SRPC_SERVICE_JOIN 6 +#define SRPC_FRAMEWORK_SERVICE_MAX_ID 10 /* other services start from SRPC_FRAMEWORK_SERVICE_MAX_ID+1 */ -#define SRPC_SERVICE_BRW 11 -#define SRPC_SERVICE_PING 12 -#define SRPC_SERVICE_MAX_ID 12 +#define SRPC_SERVICE_BRW 11 +#define SRPC_SERVICE_PING 12 +#define SRPC_SERVICE_MAX_ID 12 -#define SRPC_REQUEST_PORTAL 50 +#define SRPC_REQUEST_PORTAL 50 /* a lazy portal for framework RPC requests */ -#define SRPC_FRAMEWORK_REQUEST_PORTAL 51 +#define SRPC_FRAMEWORK_REQUEST_PORTAL 51 /* all reply/bulk RDMAs go to this portal */ -#define SRPC_RDMA_PORTAL 52 +#define SRPC_RDMA_PORTAL 52 static inline srpc_msg_type_t -srpc_service2request (int service) +srpc_service2request(int service) { switch (service) { default: - LBUG (); + LBUG(); case SRPC_SERVICE_DEBUG: return SRPC_MSG_DEBUG_REQST; @@ -129,7 +129,7 @@ srpc_service2request (int service) } static inline srpc_msg_type_t -srpc_service2reply (int service) +srpc_service2reply(int service) { return srpc_service2request(service) + 1; } @@ -149,25 +149,25 @@ typedef enum { typedef struct { srpc_event_type_t ev_type; /* what's up */ lnet_event_kind_t ev_lnet; /* LNet event type */ - int ev_fired; /* LNet event fired? */ - int ev_status; /* LNet event status */ - void *ev_data; /* owning server/client RPC */ + int ev_fired; /* LNet event fired? */ + int ev_status; /* LNet event status */ + void *ev_data; /* owning server/client RPC */ } srpc_event_t; typedef struct { - int bk_len; /* len of bulk data */ + int bk_len; /* len of bulk data */ lnet_handle_md_t bk_mdh; - int bk_sink; /* sink/source */ - int bk_niov; /* # iov in bk_iovs */ - lnet_kiov_t bk_iovs[0]; + int bk_sink; /* sink/source */ + int bk_niov; /* # iov in bk_iovs */ + lnet_kiov_t bk_iovs[0]; } srpc_bulk_t; /* bulk descriptor */ /* message buffer descriptor */ typedef struct srpc_buffer { struct list_head buf_list; /* chain on srpc_service::*_msgq */ - srpc_msg_t buf_msg; + srpc_msg_t buf_msg; lnet_handle_md_t buf_mdh; - lnet_nid_t buf_self; + lnet_nid_t buf_self; lnet_process_id_t buf_peer; } srpc_buffer_t; @@ -176,9 +176,9 @@ typedef int (*swi_action_t) (struct swi_workitem *); typedef struct swi_workitem { struct cfs_wi_sched *swi_sched; - cfs_workitem_t swi_workitem; - swi_action_t swi_action; - int swi_state; + cfs_workitem_t swi_workitem; + swi_action_t swi_action; + int swi_state; } swi_workitem_t; /* server-side state of a RPC */ @@ -186,78 +186,78 @@ struct srpc_server_rpc { /* chain on srpc_service::*_rpcq */ struct list_head srpc_list; struct srpc_service_cd *srpc_scd; - swi_workitem_t srpc_wi; - srpc_event_t srpc_ev; /* bulk/reply event */ - lnet_nid_t srpc_self; + swi_workitem_t srpc_wi; + srpc_event_t srpc_ev; /* bulk/reply event */ + lnet_nid_t srpc_self; lnet_process_id_t srpc_peer; - srpc_msg_t srpc_replymsg; + srpc_msg_t srpc_replymsg; lnet_handle_md_t srpc_replymdh; - srpc_buffer_t *srpc_reqstbuf; - srpc_bulk_t *srpc_bulk; + srpc_buffer_t *srpc_reqstbuf; + srpc_bulk_t *srpc_bulk; - unsigned int srpc_aborted; /* being given up */ - int srpc_status; - void (*srpc_done)(struct srpc_server_rpc *); + unsigned int srpc_aborted; /* being given up */ + int srpc_status; + void (*srpc_done)(struct srpc_server_rpc *); }; /* client-side state of a RPC */ typedef struct srpc_client_rpc { - struct list_head crpc_list; /* chain on user's lists */ - spinlock_t crpc_lock; /* serialize */ - int crpc_service; - atomic_t crpc_refcount; - int crpc_timeout; /* # seconds to wait for reply */ - stt_timer_t crpc_timer; - swi_workitem_t crpc_wi; + struct list_head crpc_list; /* chain on user's lists */ + spinlock_t crpc_lock; /* serialize */ + int crpc_service; + atomic_t crpc_refcount; + int crpc_timeout; /* # seconds to wait for reply */ + struct stt_timer crpc_timer; + swi_workitem_t crpc_wi; lnet_process_id_t crpc_dest; - void (*crpc_done)(struct srpc_client_rpc *); - void (*crpc_fini)(struct srpc_client_rpc *); - int crpc_status; /* completion status */ - void *crpc_priv; /* caller data */ + void (*crpc_done)(struct srpc_client_rpc *); + void (*crpc_fini)(struct srpc_client_rpc *); + int crpc_status; /* completion status */ + void *crpc_priv; /* caller data */ /* state flags */ - unsigned int crpc_aborted:1; /* being given up */ - unsigned int crpc_closed:1; /* completed */ + unsigned int crpc_aborted:1; /* being given up */ + unsigned int crpc_closed:1; /* completed */ /* RPC events */ - srpc_event_t crpc_bulkev; /* bulk event */ - srpc_event_t crpc_reqstev; /* request event */ - srpc_event_t crpc_replyev; /* reply event */ + srpc_event_t crpc_bulkev; /* bulk event */ + srpc_event_t crpc_reqstev; /* request event */ + srpc_event_t crpc_replyev; /* reply event */ /* bulk, request(reqst), and reply exchanged on wire */ - srpc_msg_t crpc_reqstmsg; - srpc_msg_t crpc_replymsg; + srpc_msg_t crpc_reqstmsg; + srpc_msg_t crpc_replymsg; lnet_handle_md_t crpc_reqstmdh; lnet_handle_md_t crpc_replymdh; - srpc_bulk_t crpc_bulk; + srpc_bulk_t crpc_bulk; } srpc_client_rpc_t; -#define srpc_client_rpc_size(rpc) \ +#define srpc_client_rpc_size(rpc) \ offsetof(srpc_client_rpc_t, crpc_bulk.bk_iovs[(rpc)->crpc_bulk.bk_niov]) -#define srpc_client_rpc_addref(rpc) \ -do { \ - CDEBUG(D_NET, "RPC[%p] -> %s (%d)++\n", \ - (rpc), libcfs_id2str((rpc)->crpc_dest), \ - atomic_read(&(rpc)->crpc_refcount)); \ - LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0); \ - atomic_inc(&(rpc)->crpc_refcount); \ +#define srpc_client_rpc_addref(rpc) \ +do { \ + CDEBUG(D_NET, "RPC[%p] -> %s (%d)++\n", \ + (rpc), libcfs_id2str((rpc)->crpc_dest), \ + atomic_read(&(rpc)->crpc_refcount)); \ + LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0); \ + atomic_inc(&(rpc)->crpc_refcount); \ } while (0) -#define srpc_client_rpc_decref(rpc) \ -do { \ - CDEBUG(D_NET, "RPC[%p] -> %s (%d)--\n", \ - (rpc), libcfs_id2str((rpc)->crpc_dest), \ - atomic_read(&(rpc)->crpc_refcount)); \ - LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0); \ - if (atomic_dec_and_test(&(rpc)->crpc_refcount)) \ - srpc_destroy_client_rpc(rpc); \ +#define srpc_client_rpc_decref(rpc) \ +do { \ + CDEBUG(D_NET, "RPC[%p] -> %s (%d)--\n", \ + (rpc), libcfs_id2str((rpc)->crpc_dest), \ + atomic_read(&(rpc)->crpc_refcount)); \ + LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0); \ + if (atomic_dec_and_test(&(rpc)->crpc_refcount)) \ + srpc_destroy_client_rpc(rpc); \ } while (0) -#define srpc_event_pending(rpc) ((rpc)->crpc_bulkev.ev_fired == 0 || \ - (rpc)->crpc_reqstev.ev_fired == 0 || \ - (rpc)->crpc_replyev.ev_fired == 0) +#define srpc_event_pending(rpc) (!(rpc)->crpc_bulkev.ev_fired || \ + !(rpc)->crpc_reqstev.ev_fired || \ + !(rpc)->crpc_replyev.ev_fired) /* CPU partition data of srpc service */ struct srpc_service_cd { @@ -268,9 +268,9 @@ struct srpc_service_cd { /** event buffer */ srpc_event_t scd_ev; /** free RPC descriptors */ - struct list_head scd_rpc_free; + struct list_head scd_rpc_free; /** in-flight RPCs */ - struct list_head scd_rpc_active; + struct list_head scd_rpc_active; /** workitem for posting buffer */ swi_workitem_t scd_buf_wi; /** CPT id */ @@ -278,7 +278,7 @@ struct srpc_service_cd { /** error code for scd_buf_wi */ int scd_buf_err; /** timestamp for scd_buf_err */ - time64_t scd_buf_err_stamp; + time64_t scd_buf_err_stamp; /** total # request buffers */ int scd_buf_total; /** # posted request buffers */ @@ -290,16 +290,16 @@ struct srpc_service_cd { /** increase/decrease some buffers */ int scd_buf_adjust; /** posted message buffers */ - struct list_head scd_buf_posted; + struct list_head scd_buf_posted; /** blocked for RPC descriptor */ - struct list_head scd_buf_blocked; + struct list_head scd_buf_blocked; }; /* number of server workitems (mini-thread) for testing service */ #define SFW_TEST_WI_MIN 256 #define SFW_TEST_WI_MAX 2048 /* extra buffers for tolerating buggy peers, or unbalanced number - * of peers between partitions */ + * of peers between partitions */ #define SFW_TEST_WI_EXTRA 64 /* number of server workitems (mini-thread) for framework service */ @@ -324,29 +324,29 @@ typedef struct srpc_service { typedef struct { struct list_head sn_list; /* chain on fw_zombie_sessions */ - lst_sid_t sn_id; /* unique identifier */ - unsigned int sn_timeout; /* # seconds' inactivity to expire */ - int sn_timer_active; - unsigned int sn_features; - stt_timer_t sn_timer; + lst_sid_t sn_id; /* unique identifier */ + unsigned int sn_timeout; /* # seconds' inactivity to expire */ + int sn_timer_active; + unsigned int sn_features; + struct stt_timer sn_timer; struct list_head sn_batches; /* list of batches */ - char sn_name[LST_NAME_SIZE]; - atomic_t sn_refcount; - atomic_t sn_brw_errors; - atomic_t sn_ping_errors; - unsigned long sn_started; + char sn_name[LST_NAME_SIZE]; + atomic_t sn_refcount; + atomic_t sn_brw_errors; + atomic_t sn_ping_errors; + unsigned long sn_started; } sfw_session_t; #define sfw_sid_equal(sid0, sid1) ((sid0).ses_nid == (sid1).ses_nid && \ (sid0).ses_stamp == (sid1).ses_stamp) typedef struct { - struct list_head bat_list; /* chain on sn_batches */ - lst_bid_t bat_id; /* batch id */ - int bat_error; /* error code of batch */ - sfw_session_t *bat_session; /* batch's session */ - atomic_t bat_nactive; /* # of active tests */ - struct list_head bat_tests; /* test instances */ + struct list_head bat_list; /* chain on sn_batches */ + lst_bid_t bat_id; /* batch id */ + int bat_error; /* error code of batch */ + sfw_session_t *bat_session; /* batch's session */ + atomic_t bat_nactive; /* # of active tests */ + struct list_head bat_tests; /* test instances */ } sfw_batch_t; typedef struct { @@ -356,32 +356,32 @@ typedef struct { * client */ int (*tso_prep_rpc)(struct sfw_test_unit *tsu, lnet_process_id_t dest, - srpc_client_rpc_t **rpc); /* prep a tests rpc */ + srpc_client_rpc_t **rpc); /* prep a tests rpc */ void (*tso_done_rpc)(struct sfw_test_unit *tsu, - srpc_client_rpc_t *rpc); /* done a test rpc */ + srpc_client_rpc_t *rpc); /* done a test rpc */ } sfw_test_client_ops_t; typedef struct sfw_test_instance { - struct list_head tsi_list; /* chain on batch */ - int tsi_service; /* test type */ - sfw_batch_t *tsi_batch; /* batch */ - sfw_test_client_ops_t *tsi_ops; /* test client operation + struct list_head tsi_list; /* chain on batch */ + int tsi_service; /* test type */ + sfw_batch_t *tsi_batch; /* batch */ + sfw_test_client_ops_t *tsi_ops; /* test client operation */ /* public parameter for all test units */ - unsigned int tsi_is_client:1; /* is test client */ - unsigned int tsi_stoptsu_onerr:1; /* stop tsu on error */ - int tsi_concur; /* concurrency */ - int tsi_loop; /* loop count */ + unsigned int tsi_is_client:1; /* is test client */ + unsigned int tsi_stoptsu_onerr:1; /* stop tsu on error */ + int tsi_concur; /* concurrency */ + int tsi_loop; /* loop count */ /* status of test instance */ - spinlock_t tsi_lock; /* serialize */ - unsigned int tsi_stopping:1; /* test is stopping */ - atomic_t tsi_nactive; /* # of active test + spinlock_t tsi_lock; /* serialize */ + unsigned int tsi_stopping:1; /* test is stopping */ + atomic_t tsi_nactive; /* # of active test * unit */ - struct list_head tsi_units; /* test units */ - struct list_head tsi_free_rpcs; /* free rpcs */ - struct list_head tsi_active_rpcs; /* active rpcs */ + struct list_head tsi_units; /* test units */ + struct list_head tsi_free_rpcs; /* free rpcs */ + struct list_head tsi_active_rpcs; /* active rpcs */ union { test_ping_req_t ping; /* ping parameter */ @@ -390,32 +390,32 @@ typedef struct sfw_test_instance { } tsi_u; } sfw_test_instance_t; -/* XXX: trailing (PAGE_CACHE_SIZE % sizeof(lnet_process_id_t)) bytes at - * the end of pages are not used */ -#define SFW_MAX_CONCUR LST_MAX_CONCUR -#define SFW_ID_PER_PAGE (PAGE_CACHE_SIZE / sizeof(lnet_process_id_packed_t)) -#define SFW_MAX_NDESTS (LNET_MAX_IOV * SFW_ID_PER_PAGE) +/* XXX: trailing (PAGE_SIZE % sizeof(lnet_process_id_t)) bytes at the end of + * pages are not used */ +#define SFW_MAX_CONCUR LST_MAX_CONCUR +#define SFW_ID_PER_PAGE (PAGE_SIZE / sizeof(lnet_process_id_packed_t)) +#define SFW_MAX_NDESTS (LNET_MAX_IOV * SFW_ID_PER_PAGE) #define sfw_id_pages(n) (((n) + SFW_ID_PER_PAGE - 1) / SFW_ID_PER_PAGE) typedef struct sfw_test_unit { - struct list_head tsu_list; /* chain on lst_test_instance */ - lnet_process_id_t tsu_dest; /* id of dest node */ - int tsu_loop; /* loop count of the test */ + struct list_head tsu_list; /* chain on lst_test_instance */ + lnet_process_id_t tsu_dest; /* id of dest node */ + int tsu_loop; /* loop count of the test */ sfw_test_instance_t *tsu_instance; /* pointer to test instance */ - void *tsu_private; /* private data */ - swi_workitem_t tsu_worker; /* workitem of the test unit */ + void *tsu_private; /* private data */ + swi_workitem_t tsu_worker; /* workitem of the test unit */ } sfw_test_unit_t; typedef struct sfw_test_case { - struct list_head tsc_list; /* chain on fw_tests */ - srpc_service_t *tsc_srv_service; /* test service */ - sfw_test_client_ops_t *tsc_cli_ops; /* ops of test client */ + struct list_head tsc_list; /* chain on fw_tests */ + srpc_service_t *tsc_srv_service; /* test service */ + sfw_test_client_ops_t *tsc_cli_ops; /* ops of test client */ } sfw_test_case_t; srpc_client_rpc_t * sfw_create_rpc(lnet_process_id_t peer, int service, unsigned features, int nbulkiov, int bulklen, - void (*done) (srpc_client_rpc_t *), void *priv); + void (*done)(srpc_client_rpc_t *), void *priv); int sfw_create_test_rpc(sfw_test_unit_t *tsu, lnet_process_id_t peer, unsigned features, int nblk, int blklen, srpc_client_rpc_t **rpc); @@ -427,7 +427,7 @@ void sfw_free_pages(struct srpc_server_rpc *rpc); void sfw_add_bulk_page(srpc_bulk_t *bk, struct page *pg, int i); int sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len, int sink); -int sfw_make_session (srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply); +int sfw_make_session(srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply); srpc_client_rpc_t * srpc_create_client_rpc(lnet_process_id_t peer, int service, @@ -472,9 +472,9 @@ static inline void swi_init_workitem(swi_workitem_t *swi, void *data, swi_action_t action, struct cfs_wi_sched *sched) { - swi->swi_sched = sched; + swi->swi_sched = sched; swi->swi_action = action; - swi->swi_state = SWI_STATE_NEWBORN; + swi->swi_state = SWI_STATE_NEWBORN; cfs_wi_init(&swi->swi_workitem, data, swi_wi_action); } @@ -502,26 +502,23 @@ void sfw_shutdown(void); void srpc_shutdown(void); static inline void -srpc_destroy_client_rpc (srpc_client_rpc_t *rpc) +srpc_destroy_client_rpc(srpc_client_rpc_t *rpc) { - LASSERT(rpc != NULL); + LASSERT(rpc); LASSERT(!srpc_event_pending(rpc)); - LASSERT(atomic_read(&rpc->crpc_refcount) == 0); + LASSERT(!atomic_read(&rpc->crpc_refcount)); - if (rpc->crpc_fini == NULL) { + if (!rpc->crpc_fini) LIBCFS_FREE(rpc, srpc_client_rpc_size(rpc)); - } else { - (*rpc->crpc_fini) (rpc); - } - - return; + else + (*rpc->crpc_fini)(rpc); } static inline void -srpc_init_client_rpc (srpc_client_rpc_t *rpc, lnet_process_id_t peer, - int service, int nbulkiov, int bulklen, - void (*rpc_done)(srpc_client_rpc_t *), - void (*rpc_fini)(srpc_client_rpc_t *), void *priv) +srpc_init_client_rpc(srpc_client_rpc_t *rpc, lnet_process_id_t peer, + int service, int nbulkiov, int bulklen, + void (*rpc_done)(srpc_client_rpc_t *), + void (*rpc_fini)(srpc_client_rpc_t *), void *priv) { LASSERT(nbulkiov <= LNET_MAX_IOV); @@ -534,30 +531,29 @@ srpc_init_client_rpc (srpc_client_rpc_t *rpc, lnet_process_id_t peer, spin_lock_init(&rpc->crpc_lock); atomic_set(&rpc->crpc_refcount, 1); /* 1 ref for caller */ - rpc->crpc_dest = peer; - rpc->crpc_priv = priv; - rpc->crpc_service = service; - rpc->crpc_bulk.bk_len = bulklen; + rpc->crpc_dest = peer; + rpc->crpc_priv = priv; + rpc->crpc_service = service; + rpc->crpc_bulk.bk_len = bulklen; rpc->crpc_bulk.bk_niov = nbulkiov; - rpc->crpc_done = rpc_done; - rpc->crpc_fini = rpc_fini; + rpc->crpc_done = rpc_done; + rpc->crpc_fini = rpc_fini; LNetInvalidateHandle(&rpc->crpc_reqstmdh); LNetInvalidateHandle(&rpc->crpc_replymdh); LNetInvalidateHandle(&rpc->crpc_bulk.bk_mdh); /* no event is expected at this point */ - rpc->crpc_bulkev.ev_fired = - rpc->crpc_reqstev.ev_fired = + rpc->crpc_bulkev.ev_fired = 1; + rpc->crpc_reqstev.ev_fired = 1; rpc->crpc_replyev.ev_fired = 1; - rpc->crpc_reqstmsg.msg_magic = SRPC_MSG_MAGIC; + rpc->crpc_reqstmsg.msg_magic = SRPC_MSG_MAGIC; rpc->crpc_reqstmsg.msg_version = SRPC_MSG_VERSION; - rpc->crpc_reqstmsg.msg_type = srpc_service2request(service); - return; + rpc->crpc_reqstmsg.msg_type = srpc_service2request(service); } static inline const char * -swi_state2str (int state) +swi_state2str(int state) { #define STATE2STR(x) case x: return #x switch (state) { @@ -602,11 +598,11 @@ srpc_wait_service_shutdown(srpc_service_t *sv) LASSERT(sv->sv_shuttingdown); - while (srpc_finish_service(sv) == 0) { + while (!srpc_finish_service(sv)) { i++; - CDEBUG (((i & -i) == i) ? D_WARNING : D_NET, - "Waiting for %s service to shutdown...\n", - sv->sv_name); + CDEBUG(((i & -i) == i) ? D_WARNING : D_NET, + "Waiting for %s service to shutdown...\n", + sv->sv_name); selftest_wait_events(); } } diff --git a/drivers/staging/lustre/lnet/selftest/timer.c b/drivers/staging/lustre/lnet/selftest/timer.c index b98c08a10..8be52526a 100644 --- a/drivers/staging/lustre/lnet/selftest/timer.c +++ b/drivers/staging/lustre/lnet/selftest/timer.c @@ -57,17 +57,17 @@ (STTIMER_NSLOTS - 1))]) static struct st_timer_data { - spinlock_t stt_lock; - unsigned long stt_prev_slot; /* start time of the slot processed + spinlock_t stt_lock; + unsigned long stt_prev_slot; /* start time of the slot processed * previously */ struct list_head stt_hash[STTIMER_NSLOTS]; - int stt_shuttingdown; + int stt_shuttingdown; wait_queue_head_t stt_waitq; - int stt_nthreads; + int stt_nthreads; } stt_data; void -stt_add_timer(stt_timer_t *timer) +stt_add_timer(struct stt_timer *timer) { struct list_head *pos; @@ -75,13 +75,14 @@ stt_add_timer(stt_timer_t *timer) LASSERT(stt_data.stt_nthreads > 0); LASSERT(!stt_data.stt_shuttingdown); - LASSERT(timer->stt_func != NULL); + LASSERT(timer->stt_func); LASSERT(list_empty(&timer->stt_list)); LASSERT(timer->stt_expires > ktime_get_real_seconds()); /* a simple insertion sort */ list_for_each_prev(pos, STTIMER_SLOT(timer->stt_expires)) { - stt_timer_t *old = list_entry(pos, stt_timer_t, stt_list); + struct stt_timer *old = list_entry(pos, struct stt_timer, + stt_list); if (timer->stt_expires >= old->stt_expires) break; @@ -101,7 +102,7 @@ stt_add_timer(stt_timer_t *timer) * another CPU. */ int -stt_del_timer(stt_timer_t *timer) +stt_del_timer(struct stt_timer *timer) { int ret = 0; @@ -124,10 +125,10 @@ static int stt_expire_list(struct list_head *slot, time64_t now) { int expired = 0; - stt_timer_t *timer; + struct stt_timer *timer; while (!list_empty(slot)) { - timer = list_entry(slot->next, stt_timer_t, stt_list); + timer = list_entry(slot->next, struct stt_timer, stt_list); if (timer->stt_expires > now) break; @@ -218,7 +219,7 @@ stt_startup(void) stt_data.stt_nthreads = 0; init_waitqueue_head(&stt_data.stt_waitq); rc = stt_start_timer_thread(); - if (rc != 0) + if (rc) CERROR("Can't spawn timer thread: %d\n", rc); return rc; @@ -237,7 +238,7 @@ stt_shutdown(void) stt_data.stt_shuttingdown = 1; wake_up(&stt_data.stt_waitq); - lst_wait_until(stt_data.stt_nthreads == 0, stt_data.stt_lock, + lst_wait_until(!stt_data.stt_nthreads, stt_data.stt_lock, "waiting for %d threads to terminate\n", stt_data.stt_nthreads); diff --git a/drivers/staging/lustre/lnet/selftest/timer.h b/drivers/staging/lustre/lnet/selftest/timer.h index 03e2ee294..f1fbebd8a 100644 --- a/drivers/staging/lustre/lnet/selftest/timer.h +++ b/drivers/staging/lustre/lnet/selftest/timer.h @@ -38,15 +38,15 @@ #ifndef __SELFTEST_TIMER_H__ #define __SELFTEST_TIMER_H__ -typedef struct { +struct stt_timer { struct list_head stt_list; - time64_t stt_expires; - void (*stt_func) (void *); - void *stt_data; -} stt_timer_t; + time64_t stt_expires; + void (*stt_func)(void *); + void *stt_data; +}; -void stt_add_timer(stt_timer_t *timer); -int stt_del_timer(stt_timer_t *timer); +void stt_add_timer(struct stt_timer *timer); +int stt_del_timer(struct stt_timer *timer); int stt_startup(void); void stt_shutdown(void); |