summaryrefslogtreecommitdiff
path: root/fs/nfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/callback.c6
-rw-r--r--fs/nfs/callback_proc.c38
-rw-r--r--fs/nfs/callback_xdr.c2
-rw-r--r--fs/nfs/client.c42
-rw-r--r--fs/nfs/dir.c5
-rw-r--r--fs/nfs/file.c15
-rw-r--r--fs/nfs/filelayout/filelayout.c1
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c478
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.h33
-rw-r--r--fs/nfs/inode.c19
-rw-r--r--fs/nfs/internal.h23
-rw-r--r--fs/nfs/nfs42.h9
-rw-r--r--fs/nfs/nfs42proc.c106
-rw-r--r--fs/nfs/nfs42xdr.c106
-rw-r--r--fs/nfs/nfs4_fs.h1
-rw-r--r--fs/nfs/nfs4client.c1
-rw-r--r--fs/nfs/nfs4file.c4
-rw-r--r--fs/nfs/nfs4getroot.c7
-rw-r--r--fs/nfs/nfs4idmap.c7
-rw-r--r--fs/nfs/nfs4proc.c268
-rw-r--r--fs/nfs/nfs4state.c31
-rw-r--r--fs/nfs/nfs4xdr.c15
-rw-r--r--fs/nfs/pagelist.c12
-rw-r--r--fs/nfs/pnfs.c162
-rw-r--r--fs/nfs/pnfs.h13
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/nfs/symlink.c19
-rw-r--r--fs/nfs/write.c26
28 files changed, 1158 insertions, 293 deletions
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 8d129bb73..682529c00 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -458,7 +458,7 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
* pg_authenticate method for nfsv4 callback threads.
*
* The authflavor has been negotiated, so an incorrect flavor is a server
- * bug. Drop packets with incorrect authflavor.
+ * bug. Deny packets with incorrect authflavor.
*
* All other checking done after NFS decoding where the nfs_client can be
* found in nfs4_callback_compound
@@ -468,12 +468,12 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp)
switch (rqstp->rq_authop->flavour) {
case RPC_AUTH_NULL:
if (rqstp->rq_proc != CB_NULL)
- return SVC_DROP;
+ return SVC_DENIED;
break;
case RPC_AUTH_GSS:
/* No RPC_AUTH_GSS support yet in NFSv4.1 */
if (svc_is_backchannel(rqstp))
- return SVC_DROP;
+ return SVC_DENIED;
}
return SVC_OK;
}
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 197806fb8..29e3c1b01 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -327,10 +327,8 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
dprintk("%s slot table seqid: %u\n", __func__, slot->seq_nr);
/* Normal */
- if (likely(args->csa_sequenceid == slot->seq_nr + 1)) {
- slot->seq_nr++;
+ if (likely(args->csa_sequenceid == slot->seq_nr + 1))
goto out_ok;
- }
/* Replay */
if (args->csa_sequenceid == slot->seq_nr) {
@@ -418,6 +416,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
struct cb_process_state *cps)
{
struct nfs4_slot_table *tbl;
+ struct nfs4_slot *slot;
struct nfs_client *clp;
int i;
__be32 status = htonl(NFS4ERR_BADSESSION);
@@ -429,25 +428,32 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
if (!(clp->cl_session->flags & SESSION4_BACK_CHAN))
goto out;
+
tbl = &clp->cl_session->bc_slot_table;
+ slot = tbl->slots + args->csa_slotid;
spin_lock(&tbl->slot_tbl_lock);
/* state manager is resetting the session */
if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) {
- spin_unlock(&tbl->slot_tbl_lock);
status = htonl(NFS4ERR_DELAY);
/* Return NFS4ERR_BADSESSION if we're draining the session
* in order to reset it.
*/
if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
status = htonl(NFS4ERR_BADSESSION);
- goto out;
+ goto out_unlock;
}
- status = validate_seqid(&clp->cl_session->bc_slot_table, args);
- spin_unlock(&tbl->slot_tbl_lock);
+ memcpy(&res->csr_sessionid, &args->csa_sessionid,
+ sizeof(res->csr_sessionid));
+ res->csr_sequenceid = args->csa_sequenceid;
+ res->csr_slotid = args->csa_slotid;
+ res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
+ res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
+
+ status = validate_seqid(tbl, args);
if (status)
- goto out;
+ goto out_unlock;
cps->slotid = args->csa_slotid;
@@ -458,15 +464,17 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
*/
if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) {
status = htonl(NFS4ERR_DELAY);
- goto out;
+ goto out_unlock;
}
- memcpy(&res->csr_sessionid, &args->csa_sessionid,
- sizeof(res->csr_sessionid));
- res->csr_sequenceid = args->csa_sequenceid;
- res->csr_slotid = args->csa_slotid;
- res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
- res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
+ /*
+ * RFC5661 20.9.3
+ * If CB_SEQUENCE returns an error, then the state of the slot
+ * (sequence ID, cached reply) MUST NOT change.
+ */
+ slot->seq_nr++;
+out_unlock:
+ spin_unlock(&tbl->slot_tbl_lock);
out:
cps->clp = clp; /* put in nfs4_callback_compound */
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 19ca95cdf..6b1697a01 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -909,7 +909,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
xdr_init_encode(&xdr_out, &rqstp->rq_res, p);
status = decode_compound_hdr_arg(&xdr_in, &hdr_arg);
- if (status == __constant_htonl(NFS4ERR_RESOURCE))
+ if (status == htonl(NFS4ERR_RESOURCE))
return rpc_garbage_args;
if (hdr_arg.minorversion == 0) {
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 892aefff3..4a90c9bb3 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -775,7 +775,7 @@ static int nfs_init_server(struct nfs_server *server,
server->options = data->options;
server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP|
- NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME|NFS_CAP_CHANGE_ATTR;
+ NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME;
if (data->rsize)
server->rsize = nfs_block_size(data->rsize, NULL);
@@ -825,7 +825,6 @@ error:
* Load up the server record from information gained in an fsinfo record
*/
static void nfs_server_set_fsinfo(struct nfs_server *server,
- struct nfs_fh *mntfh,
struct nfs_fsinfo *fsinfo)
{
unsigned long max_rpc_payload;
@@ -901,7 +900,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
if (error < 0)
goto out_error;
- nfs_server_set_fsinfo(server, mntfh, &fsinfo);
+ nfs_server_set_fsinfo(server, &fsinfo);
/* Get some general file system info */
if (server->namelen == 0) {
@@ -1193,8 +1192,6 @@ void nfs_clients_init(struct net *net)
}
#ifdef CONFIG_PROC_FS
-static struct proc_dir_entry *proc_fs_nfs;
-
static int nfs_server_list_open(struct inode *inode, struct file *file);
static void *nfs_server_list_start(struct seq_file *p, loff_t *pos);
static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos);
@@ -1364,27 +1361,29 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
{
struct nfs_server *server;
struct nfs_client *clp;
- char dev[8], fsid[17];
+ char dev[13]; // 8 for 2^24, 1 for ':', 3 for 2^8, 1 for '\0'
+ char fsid[34]; // 2 * 16 for %llx, 1 for ':', 1 for '\0'
struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id);
/* display header on line 1 */
if (v == &nn->nfs_volume_list) {
- seq_puts(m, "NV SERVER PORT DEV FSID FSC\n");
+ seq_puts(m, "NV SERVER PORT DEV FSID"
+ " FSC\n");
return 0;
}
/* display one transport per line on subsequent lines */
server = list_entry(v, struct nfs_server, master_link);
clp = server->nfs_client;
- snprintf(dev, 8, "%u:%u",
+ snprintf(dev, sizeof(dev), "%u:%u",
MAJOR(server->s_dev), MINOR(server->s_dev));
- snprintf(fsid, 17, "%llx:%llx",
+ snprintf(fsid, sizeof(fsid), "%llx:%llx",
(unsigned long long) server->fsid.major,
(unsigned long long) server->fsid.minor);
rcu_read_lock();
- seq_printf(m, "v%u %s %s %-7s %-17s %s\n",
+ seq_printf(m, "v%u %s %s %-12s %-33s %s\n",
clp->rpc_ops->version,
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT),
@@ -1434,27 +1433,20 @@ void nfs_fs_proc_net_exit(struct net *net)
*/
int __init nfs_fs_proc_init(void)
{
- struct proc_dir_entry *p;
-
- proc_fs_nfs = proc_mkdir("fs/nfsfs", NULL);
- if (!proc_fs_nfs)
+ if (!proc_mkdir("fs/nfsfs", NULL))
goto error_0;
/* a file of servers with which we're dealing */
- p = proc_symlink("servers", proc_fs_nfs, "../../net/nfsfs/servers");
- if (!p)
+ if (!proc_symlink("fs/nfsfs/servers", NULL, "../../net/nfsfs/servers"))
goto error_1;
/* a file of volumes that we have mounted */
- p = proc_symlink("volumes", proc_fs_nfs, "../../net/nfsfs/volumes");
- if (!p)
- goto error_2;
- return 0;
+ if (!proc_symlink("fs/nfsfs/volumes", NULL, "../../net/nfsfs/volumes"))
+ goto error_1;
-error_2:
- remove_proc_entry("servers", proc_fs_nfs);
+ return 0;
error_1:
- remove_proc_entry("fs/nfsfs", NULL);
+ remove_proc_subtree("fs/nfsfs", NULL);
error_0:
return -ENOMEM;
}
@@ -1464,9 +1456,7 @@ error_0:
*/
void nfs_fs_proc_exit(void)
{
- remove_proc_entry("volumes", proc_fs_nfs);
- remove_proc_entry("servers", proc_fs_nfs);
- remove_proc_entry("fs/nfsfs", NULL);
+ remove_proc_subtree("fs/nfsfs", NULL);
}
#endif /* CONFIG_PROC_FS */
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b2c8b31b2..547308a5e 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1470,9 +1470,6 @@ static int nfs_finish_open(struct nfs_open_context *ctx,
{
int err;
- if ((open_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
- *opened |= FILE_CREATED;
-
err = finish_open(file, dentry, do_open, opened);
if (err)
goto out;
@@ -1771,7 +1768,7 @@ EXPORT_SYMBOL_GPL(nfs_mkdir);
static void nfs_dentry_handle_enoent(struct dentry *dentry)
{
- if (d_really_is_positive(dentry) && !d_unhashed(dentry))
+ if (simple_positive(dentry))
d_delete(dentry);
}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 8b8d83a52..cc4fa1ed6 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -555,31 +555,22 @@ static int nfs_launder_page(struct page *page)
return nfs_wb_page(inode, page);
}
-#ifdef CONFIG_NFS_SWAP
static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
sector_t *span)
{
- int ret;
struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host);
*span = sis->pages;
- rcu_read_lock();
- ret = xs_swapper(rcu_dereference(clnt->cl_xprt), 1);
- rcu_read_unlock();
-
- return ret;
+ return rpc_clnt_swap_activate(clnt);
}
static void nfs_swap_deactivate(struct file *file)
{
struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host);
- rcu_read_lock();
- xs_swapper(rcu_dereference(clnt->cl_xprt), 0);
- rcu_read_unlock();
+ rpc_clnt_swap_deactivate(clnt);
}
-#endif
const struct address_space_operations nfs_file_aops = {
.readpage = nfs_readpage,
@@ -596,10 +587,8 @@ const struct address_space_operations nfs_file_aops = {
.launder_page = nfs_launder_page,
.is_dirty_writeback = nfs_check_dirty_writeback,
.error_remove_page = generic_error_remove_page,
-#ifdef CONFIG_NFS_SWAP
.swap_activate = nfs_swap_activate,
.swap_deactivate = nfs_swap_deactivate,
-#endif
};
/*
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index a46bf6de9..b34f2e228 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -32,6 +32,7 @@
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
#include <linux/module.h>
+#include <linux/backing-dev.h>
#include <linux/sunrpc/metrics.h>
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 6f5f0f425..b3289d701 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -20,6 +20,7 @@
#include "../nfs4trace.h"
#include "../iostat.h"
#include "../nfs.h"
+#include "../nfs42.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
@@ -182,17 +183,14 @@ static void _ff_layout_free_lseg(struct nfs4_ff_layout_segment *fls)
static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls)
{
- struct nfs4_ff_layout_mirror *tmp;
int i, j;
for (i = 0; i < fls->mirror_array_cnt - 1; i++) {
for (j = i + 1; j < fls->mirror_array_cnt; j++)
if (fls->mirror_array[i]->efficiency <
- fls->mirror_array[j]->efficiency) {
- tmp = fls->mirror_array[i];
- fls->mirror_array[i] = fls->mirror_array[j];
- fls->mirror_array[j] = tmp;
- }
+ fls->mirror_array[j]->efficiency)
+ swap(fls->mirror_array[i],
+ fls->mirror_array[j]);
}
}
@@ -274,6 +272,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
spin_lock_init(&fls->mirror_array[i]->lock);
fls->mirror_array[i]->ds_count = ds_count;
+ fls->mirror_array[i]->lseg = &fls->generic_hdr;
/* deviceid */
rc = decode_deviceid(&stream, &devid);
@@ -344,6 +343,10 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
fls->mirror_array[i]->gid);
}
+ p = xdr_inline_decode(&stream, 4);
+ if (p)
+ fls->flags = be32_to_cpup(p);
+
ff_layout_sort_mirrors(fls);
rc = ff_layout_check_layout(lgr);
if (rc)
@@ -415,6 +418,146 @@ ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls)
return 1;
}
+static void
+nfs4_ff_start_busy_timer(struct nfs4_ff_busy_timer *timer)
+{
+ /* first IO request? */
+ if (atomic_inc_return(&timer->n_ops) == 1) {
+ timer->start_time = ktime_get();
+ }
+}
+
+static ktime_t
+nfs4_ff_end_busy_timer(struct nfs4_ff_busy_timer *timer)
+{
+ ktime_t start, now;
+
+ if (atomic_dec_return(&timer->n_ops) < 0)
+ WARN_ON_ONCE(1);
+
+ now = ktime_get();
+ start = timer->start_time;
+ timer->start_time = now;
+ return ktime_sub(now, start);
+}
+
+static ktime_t
+nfs4_ff_layout_calc_completion_time(struct rpc_task *task)
+{
+ return ktime_sub(ktime_get(), task->tk_start);
+}
+
+static bool
+nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror,
+ struct nfs4_ff_layoutstat *layoutstat)
+{
+ static const ktime_t notime = {0};
+ ktime_t now = ktime_get();
+
+ nfs4_ff_start_busy_timer(&layoutstat->busy_timer);
+ if (ktime_equal(mirror->start_time, notime))
+ mirror->start_time = now;
+ if (ktime_equal(mirror->last_report_time, notime))
+ mirror->last_report_time = now;
+ if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >=
+ FF_LAYOUTSTATS_REPORT_INTERVAL) {
+ mirror->last_report_time = now;
+ return true;
+ }
+
+ return false;
+}
+
+static void
+nfs4_ff_layout_stat_io_update_requested(struct nfs4_ff_layoutstat *layoutstat,
+ __u64 requested)
+{
+ struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat;
+
+ iostat->ops_requested++;
+ iostat->bytes_requested += requested;
+}
+
+static void
+nfs4_ff_layout_stat_io_update_completed(struct nfs4_ff_layoutstat *layoutstat,
+ __u64 requested,
+ __u64 completed,
+ ktime_t time_completed)
+{
+ struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat;
+ ktime_t timer;
+
+ iostat->ops_completed++;
+ iostat->bytes_completed += completed;
+ iostat->bytes_not_delivered += requested - completed;
+
+ timer = nfs4_ff_end_busy_timer(&layoutstat->busy_timer);
+ iostat->total_busy_time =
+ ktime_add(iostat->total_busy_time, timer);
+ iostat->aggregate_completion_time =
+ ktime_add(iostat->aggregate_completion_time, time_completed);
+}
+
+static void
+nfs4_ff_layout_stat_io_start_read(struct nfs4_ff_layout_mirror *mirror,
+ __u64 requested)
+{
+ bool report;
+
+ spin_lock(&mirror->lock);
+ report = nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat);
+ nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested);
+ spin_unlock(&mirror->lock);
+
+ if (report)
+ pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode);
+}
+
+static void
+nfs4_ff_layout_stat_io_end_read(struct rpc_task *task,
+ struct nfs4_ff_layout_mirror *mirror,
+ __u64 requested,
+ __u64 completed)
+{
+ spin_lock(&mirror->lock);
+ nfs4_ff_layout_stat_io_update_completed(&mirror->read_stat,
+ requested, completed,
+ nfs4_ff_layout_calc_completion_time(task));
+ spin_unlock(&mirror->lock);
+}
+
+static void
+nfs4_ff_layout_stat_io_start_write(struct nfs4_ff_layout_mirror *mirror,
+ __u64 requested)
+{
+ bool report;
+
+ spin_lock(&mirror->lock);
+ report = nfs4_ff_layoutstat_start_io(mirror , &mirror->write_stat);
+ nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested);
+ spin_unlock(&mirror->lock);
+
+ if (report)
+ pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode);
+}
+
+static void
+nfs4_ff_layout_stat_io_end_write(struct rpc_task *task,
+ struct nfs4_ff_layout_mirror *mirror,
+ __u64 requested,
+ __u64 completed,
+ enum nfs3_stable_how committed)
+{
+ if (committed == NFS_UNSTABLE)
+ requested = completed = 0;
+
+ spin_lock(&mirror->lock);
+ nfs4_ff_layout_stat_io_update_completed(&mirror->write_stat,
+ requested, completed,
+ nfs4_ff_layout_calc_completion_time(task));
+ spin_unlock(&mirror->lock);
+}
+
static int
ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo,
@@ -879,6 +1022,12 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
return 0;
}
+static bool
+ff_layout_need_layoutcommit(struct pnfs_layout_segment *lseg)
+{
+ return !(FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_LAYOUTCOMMIT);
+}
+
/*
* We reference the rpc_cred of the first WRITE that triggers the need for
* a LAYOUTCOMMIT, and use it to send the layoutcommit compound.
@@ -891,6 +1040,9 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
static void
ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr)
{
+ if (!ff_layout_need_layoutcommit(hdr->lseg))
+ return;
+
pnfs_set_layoutcommit(hdr->inode, hdr->lseg,
hdr->mds_offset + hdr->res.count);
dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
@@ -909,6 +1061,10 @@ ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx)
static int ff_layout_read_prepare_common(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
+ nfs4_ff_layout_stat_io_start_read(
+ FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
+ hdr->args.count);
+
if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
rpc_exit(task, -EIO);
return -EIO;
@@ -962,15 +1118,15 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data)
{
struct nfs_pgio_header *hdr = data;
- if (ff_layout_read_prepare_common(task, hdr))
- return;
-
if (ff_layout_setup_sequence(hdr->ds_clp,
&hdr->args.seq_args,
&hdr->res.seq_res,
task))
return;
+ if (ff_layout_read_prepare_common(task, hdr))
+ return;
+
if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
hdr->args.lock_context, FMODE_READ) == -EIO)
rpc_exit(task, -EIO); /* lost lock, terminate I/O */
@@ -982,6 +1138,10 @@ static void ff_layout_read_call_done(struct rpc_task *task, void *data)
dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
+ nfs4_ff_layout_stat_io_end_read(task,
+ FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
+ hdr->args.count, hdr->res.count);
+
if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
task->tk_status == 0) {
nfs4_sequence_done(task, &hdr->res.seq_res);
@@ -1074,7 +1234,8 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
return -EAGAIN;
}
- if (data->verf.committed == NFS_UNSTABLE)
+ if (data->verf.committed == NFS_UNSTABLE
+ && ff_layout_need_layoutcommit(data->lseg))
pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb);
return 0;
@@ -1083,6 +1244,10 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
static int ff_layout_write_prepare_common(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
+ nfs4_ff_layout_stat_io_start_write(
+ FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
+ hdr->args.count);
+
if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
rpc_exit(task, -EIO);
return -EIO;
@@ -1116,15 +1281,15 @@ static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data)
{
struct nfs_pgio_header *hdr = data;
- if (ff_layout_write_prepare_common(task, hdr))
- return;
-
if (ff_layout_setup_sequence(hdr->ds_clp,
&hdr->args.seq_args,
&hdr->res.seq_res,
task))
return;
+ if (ff_layout_write_prepare_common(task, hdr))
+ return;
+
if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
hdr->args.lock_context, FMODE_WRITE) == -EIO)
rpc_exit(task, -EIO); /* lost lock, terminate I/O */
@@ -1134,6 +1299,11 @@ static void ff_layout_write_call_done(struct rpc_task *task, void *data)
{
struct nfs_pgio_header *hdr = data;
+ nfs4_ff_layout_stat_io_end_write(task,
+ FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
+ hdr->args.count, hdr->res.count,
+ hdr->res.verf->committed);
+
if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
task->tk_status == 0) {
nfs4_sequence_done(task, &hdr->res.seq_res);
@@ -1152,8 +1322,17 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data)
&NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]);
}
+static void ff_layout_commit_prepare_common(struct rpc_task *task,
+ struct nfs_commit_data *cdata)
+{
+ nfs4_ff_layout_stat_io_start_write(
+ FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
+ 0);
+}
+
static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data)
{
+ ff_layout_commit_prepare_common(task, data);
rpc_call_start(task);
}
@@ -1161,10 +1340,30 @@ static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data)
{
struct nfs_commit_data *wdata = data;
- ff_layout_setup_sequence(wdata->ds_clp,
+ if (ff_layout_setup_sequence(wdata->ds_clp,
&wdata->args.seq_args,
&wdata->res.seq_res,
- task);
+ task))
+ return;
+ ff_layout_commit_prepare_common(task, data);
+}
+
+static void ff_layout_commit_done(struct rpc_task *task, void *data)
+{
+ struct nfs_commit_data *cdata = data;
+ struct nfs_page *req;
+ __u64 count = 0;
+
+ if (task->tk_status == 0) {
+ list_for_each_entry(req, &cdata->pages, wb_list)
+ count += req->wb_bytes;
+ }
+
+ nfs4_ff_layout_stat_io_end_write(task,
+ FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
+ count, count, NFS_FILE_SYNC);
+
+ pnfs_generic_write_commit_done(task, data);
}
static void ff_layout_commit_count_stats(struct rpc_task *task, void *data)
@@ -1205,14 +1404,14 @@ static const struct rpc_call_ops ff_layout_write_call_ops_v4 = {
static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = {
.rpc_call_prepare = ff_layout_commit_prepare_v3,
- .rpc_call_done = pnfs_generic_write_commit_done,
+ .rpc_call_done = ff_layout_commit_done,
.rpc_count_stats = ff_layout_commit_count_stats,
.rpc_release = pnfs_generic_commit_release,
};
static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = {
.rpc_call_prepare = ff_layout_commit_prepare_v4,
- .rpc_call_done = pnfs_generic_write_commit_done,
+ .rpc_call_done = ff_layout_commit_done,
.rpc_count_stats = ff_layout_commit_count_stats,
.rpc_release = pnfs_generic_commit_release,
};
@@ -1256,7 +1455,6 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
fh = nfs4_ff_layout_select_ds_fh(lseg, idx);
if (fh)
hdr->args.fh = fh;
-
/*
* Note that if we ever decide to split across DSes,
* then we may need to handle dense-like offsets.
@@ -1385,6 +1583,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
if (fh)
data->args.fh = fh;
+
return nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_commit_call_ops_v3 :
&ff_layout_commit_call_ops_v4,
@@ -1488,6 +1687,247 @@ out:
dprintk("%s: Return\n", __func__);
}
+static int
+ff_layout_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen)
+{
+ const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
+
+ return snprintf(buf, buflen, "%pI4", &sin->sin_addr);
+}
+
+static size_t
+ff_layout_ntop6_noscopeid(const struct sockaddr *sap, char *buf,
+ const int buflen)
+{
+ const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
+ const struct in6_addr *addr = &sin6->sin6_addr;
+
+ /*
+ * RFC 4291, Section 2.2.2
+ *
+ * Shorthanded ANY address
+ */
+ if (ipv6_addr_any(addr))
+ return snprintf(buf, buflen, "::");
+
+ /*
+ * RFC 4291, Section 2.2.2
+ *
+ * Shorthanded loopback address
+ */
+ if (ipv6_addr_loopback(addr))
+ return snprintf(buf, buflen, "::1");
+
+ /*
+ * RFC 4291, Section 2.2.3
+ *
+ * Special presentation address format for mapped v4
+ * addresses.
+ */
+ if (ipv6_addr_v4mapped(addr))
+ return snprintf(buf, buflen, "::ffff:%pI4",
+ &addr->s6_addr32[3]);
+
+ /*
+ * RFC 4291, Section 2.2.1
+ */
+ return snprintf(buf, buflen, "%pI6c", addr);
+}
+
+/* Derived from rpc_sockaddr2uaddr */
+static void
+ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da)
+{
+ struct sockaddr *sap = (struct sockaddr *)&da->da_addr;
+ char portbuf[RPCBIND_MAXUADDRPLEN];
+ char addrbuf[RPCBIND_MAXUADDRLEN];
+ char *netid;
+ unsigned short port;
+ int len, netid_len;
+ __be32 *p;
+
+ switch (sap->sa_family) {
+ case AF_INET:
+ if (ff_layout_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0)
+ return;
+ port = ntohs(((struct sockaddr_in *)sap)->sin_port);
+ netid = "tcp";
+ netid_len = 3;
+ break;
+ case AF_INET6:
+ if (ff_layout_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0)
+ return;
+ port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port);
+ netid = "tcp6";
+ netid_len = 4;
+ break;
+ default:
+ /* we only support tcp and tcp6 */
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ snprintf(portbuf, sizeof(portbuf), ".%u.%u", port >> 8, port & 0xff);
+ len = strlcat(addrbuf, portbuf, sizeof(addrbuf));
+
+ p = xdr_reserve_space(xdr, 4 + netid_len);
+ xdr_encode_opaque(p, netid, netid_len);
+
+ p = xdr_reserve_space(xdr, 4 + len);
+ xdr_encode_opaque(p, addrbuf, len);
+}
+
+static void
+ff_layout_encode_nfstime(struct xdr_stream *xdr,
+ ktime_t t)
+{
+ struct timespec64 ts;
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 12);
+ ts = ktime_to_timespec64(t);
+ p = xdr_encode_hyper(p, ts.tv_sec);
+ *p++ = cpu_to_be32(ts.tv_nsec);
+}
+
+static void
+ff_layout_encode_io_latency(struct xdr_stream *xdr,
+ struct nfs4_ff_io_stat *stat)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 5 * 8);
+ p = xdr_encode_hyper(p, stat->ops_requested);
+ p = xdr_encode_hyper(p, stat->bytes_requested);
+ p = xdr_encode_hyper(p, stat->ops_completed);
+ p = xdr_encode_hyper(p, stat->bytes_completed);
+ p = xdr_encode_hyper(p, stat->bytes_not_delivered);
+ ff_layout_encode_nfstime(xdr, stat->total_busy_time);
+ ff_layout_encode_nfstime(xdr, stat->aggregate_completion_time);
+}
+
+static void
+ff_layout_encode_layoutstats(struct xdr_stream *xdr,
+ struct nfs42_layoutstat_args *args,
+ struct nfs42_layoutstat_devinfo *devinfo)
+{
+ struct nfs4_ff_layout_mirror *mirror = devinfo->layout_private;
+ struct nfs4_pnfs_ds_addr *da;
+ struct nfs4_pnfs_ds *ds = mirror->mirror_ds->ds;
+ struct nfs_fh *fh = &mirror->fh_versions[0];
+ __be32 *p, *start;
+
+ da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node);
+ dprintk("%s: DS %s: encoding address %s\n",
+ __func__, ds->ds_remotestr, da->da_remotestr);
+ /* layoutupdate length */
+ start = xdr_reserve_space(xdr, 4);
+ /* netaddr4 */
+ ff_layout_encode_netaddr(xdr, da);
+ /* nfs_fh4 */
+ p = xdr_reserve_space(xdr, 4 + fh->size);
+ xdr_encode_opaque(p, fh->data, fh->size);
+ /* ff_io_latency4 read */
+ spin_lock(&mirror->lock);
+ ff_layout_encode_io_latency(xdr, &mirror->read_stat.io_stat);
+ /* ff_io_latency4 write */
+ ff_layout_encode_io_latency(xdr, &mirror->write_stat.io_stat);
+ spin_unlock(&mirror->lock);
+ /* nfstime4 */
+ ff_layout_encode_nfstime(xdr, ktime_sub(ktime_get(), mirror->start_time));
+ /* bool */
+ p = xdr_reserve_space(xdr, 4);
+ *p = cpu_to_be32(false);
+
+ *start = cpu_to_be32((xdr->p - start - 1) * 4);
+}
+
+static bool
+ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args,
+ struct pnfs_layout_segment *pls,
+ int *dev_count, int dev_limit)
+{
+ struct nfs4_ff_layout_mirror *mirror;
+ struct nfs4_deviceid_node *dev;
+ struct nfs42_layoutstat_devinfo *devinfo;
+ int i;
+
+ for (i = 0; i < FF_LAYOUT_MIRROR_COUNT(pls); i++) {
+ if (*dev_count >= dev_limit)
+ break;
+ mirror = FF_LAYOUT_COMP(pls, i);
+ if (!mirror || !mirror->mirror_ds)
+ continue;
+ dev = FF_LAYOUT_DEVID_NODE(pls, i);
+ devinfo = &args->devinfo[*dev_count];
+ memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE);
+ devinfo->offset = pls->pls_range.offset;
+ devinfo->length = pls->pls_range.length;
+ /* well, we don't really know if IO is continuous or not! */
+ devinfo->read_count = mirror->read_stat.io_stat.bytes_completed;
+ devinfo->read_bytes = mirror->read_stat.io_stat.bytes_completed;
+ devinfo->write_count = mirror->write_stat.io_stat.bytes_completed;
+ devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed;
+ devinfo->layout_type = LAYOUT_FLEX_FILES;
+ devinfo->layoutstats_encode = ff_layout_encode_layoutstats;
+ devinfo->layout_private = mirror;
+ /* lseg refcount put in cleanup_layoutstats */
+ pnfs_get_lseg(pls);
+
+ ++(*dev_count);
+ }
+
+ return *dev_count < dev_limit;
+}
+
+static int
+ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args)
+{
+ struct pnfs_layout_segment *pls;
+ int dev_count = 0;
+
+ spin_lock(&args->inode->i_lock);
+ list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) {
+ dev_count += FF_LAYOUT_MIRROR_COUNT(pls);
+ }
+ spin_unlock(&args->inode->i_lock);
+ /* For now, send at most PNFS_LAYOUTSTATS_MAXDEV statistics */
+ if (dev_count > PNFS_LAYOUTSTATS_MAXDEV) {
+ dprintk("%s: truncating devinfo to limit (%d:%d)\n",
+ __func__, dev_count, PNFS_LAYOUTSTATS_MAXDEV);
+ dev_count = PNFS_LAYOUTSTATS_MAXDEV;
+ }
+ args->devinfo = kmalloc(dev_count * sizeof(*args->devinfo), GFP_KERNEL);
+ if (!args->devinfo)
+ return -ENOMEM;
+
+ dev_count = 0;
+ spin_lock(&args->inode->i_lock);
+ list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) {
+ if (!ff_layout_mirror_prepare_stats(args, pls, &dev_count,
+ PNFS_LAYOUTSTATS_MAXDEV)) {
+ break;
+ }
+ }
+ spin_unlock(&args->inode->i_lock);
+ args->num_dev = dev_count;
+
+ return 0;
+}
+
+static void
+ff_layout_cleanup_layoutstats(struct nfs42_layoutstat_data *data)
+{
+ struct nfs4_ff_layout_mirror *mirror;
+ int i;
+
+ for (i = 0; i < data->args.num_dev; i++) {
+ mirror = data->args.devinfo[i].layout_private;
+ data->args.devinfo[i].layout_private = NULL;
+ pnfs_put_lseg(mirror->lseg);
+ }
+}
+
static struct pnfs_layoutdriver_type flexfilelayout_type = {
.id = LAYOUT_FLEX_FILES,
.name = "LAYOUT_FLEX_FILES",
@@ -1510,6 +1950,8 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = {
.alloc_deviceid_node = ff_layout_alloc_deviceid_node,
.encode_layoutreturn = ff_layout_encode_layoutreturn,
.sync = pnfs_nfs_generic_sync,
+ .prepare_layoutstats = ff_layout_prepare_layoutstats,
+ .cleanup_layoutstats = ff_layout_cleanup_layoutstats,
};
static int __init nfs4flexfilelayout_init(void)
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index 070f20445..f92f9a0a8 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -9,12 +9,17 @@
#ifndef FS_NFS_NFS4FLEXFILELAYOUT_H
#define FS_NFS_NFS4FLEXFILELAYOUT_H
+#define FF_FLAGS_NO_LAYOUTCOMMIT 1
+
#include "../pnfs.h"
/* XXX: Let's filter out insanely large mirror count for now to avoid oom
* due to network error etc. */
#define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096
+/* LAYOUTSTATS report interval in ms */
+#define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L)
+
struct nfs4_ff_ds_version {
u32 version;
u32 minor_version;
@@ -41,24 +46,48 @@ struct nfs4_ff_layout_ds_err {
struct nfs4_deviceid deviceid;
};
+struct nfs4_ff_io_stat {
+ __u64 ops_requested;
+ __u64 bytes_requested;
+ __u64 ops_completed;
+ __u64 bytes_completed;
+ __u64 bytes_not_delivered;
+ ktime_t total_busy_time;
+ ktime_t aggregate_completion_time;
+};
+
+struct nfs4_ff_busy_timer {
+ ktime_t start_time;
+ atomic_t n_ops;
+};
+
+struct nfs4_ff_layoutstat {
+ struct nfs4_ff_io_stat io_stat;
+ struct nfs4_ff_busy_timer busy_timer;
+};
+
struct nfs4_ff_layout_mirror {
+ struct pnfs_layout_segment *lseg; /* back pointer */
u32 ds_count;
u32 efficiency;
struct nfs4_ff_layout_ds *mirror_ds;
u32 fh_versions_cnt;
struct nfs_fh *fh_versions;
nfs4_stateid stateid;
- struct nfs4_string user_name;
- struct nfs4_string group_name;
u32 uid;
u32 gid;
struct rpc_cred *cred;
spinlock_t lock;
+ struct nfs4_ff_layoutstat read_stat;
+ struct nfs4_ff_layoutstat write_stat;
+ ktime_t start_time;
+ ktime_t last_report_time;
};
struct nfs4_ff_layout_segment {
struct pnfs_layout_segment generic_hdr;
u64 stripe_unit;
+ u32 flags;
u32 mirror_array_cnt;
struct nfs4_ff_layout_mirror **mirror_array;
};
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 5d25b9d97..0adc7d245 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -442,8 +442,9 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
inode->i_version = fattr->change_attr;
- else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR))
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
+ else
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
+ | NFS_INO_REVAL_PAGECACHE);
if (fattr->valid & NFS_ATTR_FATTR_SIZE)
inode->i_size = nfs_size_to_loff_t(fattr->size);
else
@@ -678,6 +679,8 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
if (!err) {
generic_fillattr(inode, stat);
stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
+ if (S_ISDIR(inode->i_mode))
+ stat->blksize = NFS_SERVER(inode)->dtsize;
}
out:
trace_nfs_getattr_exit(inode, err);
@@ -1689,7 +1692,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfs_force_lookup_revalidate(inode);
inode->i_version = fattr->change_attr;
}
- } else if (server->caps & NFS_CAP_CHANGE_ATTR)
+ } else
nfsi->cache_validity |= save_cache_validity;
if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
@@ -2008,17 +2011,15 @@ static int __init init_nfs_fs(void)
if (err)
goto out1;
-#ifdef CONFIG_PROC_FS
rpc_proc_register(&init_net, &nfs_rpcstat);
-#endif
- if ((err = register_nfs_fs()) != 0)
+
+ err = register_nfs_fs();
+ if (err)
goto out0;
return 0;
out0:
-#ifdef CONFIG_PROC_FS
rpc_proc_unregister(&init_net, "nfs");
-#endif
nfs_destroy_directcache();
out1:
nfs_destroy_writepagecache();
@@ -2049,9 +2050,7 @@ static void __exit exit_nfs_fs(void)
nfs_destroy_nfspagecache();
nfs_fscache_unregister();
unregister_pernet_subsys(&nfs_net_ops);
-#ifdef CONFIG_PROC_FS
rpc_proc_unregister(&init_net, "nfs");
-#endif
unregister_nfs_fs();
nfs_fs_proc_exit();
nfsiod_stop();
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 9e6475bc5..9b372b845 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -296,6 +296,22 @@ extern struct rpc_procinfo nfs4_procedures[];
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
extern struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags);
+static inline struct nfs4_label *
+nfs4_label_copy(struct nfs4_label *dst, struct nfs4_label *src)
+{
+ if (!dst || !src)
+ return NULL;
+
+ if (src->len > NFS4_MAXLABELLEN)
+ return NULL;
+
+ dst->lfs = src->lfs;
+ dst->pi = src->pi;
+ dst->len = src->len;
+ memcpy(dst->label, src->label, src->len);
+
+ return dst;
+}
static inline void nfs4_label_free(struct nfs4_label *label)
{
if (label) {
@@ -316,6 +332,11 @@ static inline void nfs4_label_free(void *label) {}
static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi)
{
}
+static inline struct nfs4_label *
+nfs4_label_copy(struct nfs4_label *dst, struct nfs4_label *src)
+{
+ return NULL;
+}
#endif /* CONFIG_NFS_V4_SECURITY_LABEL */
/* proc.c */
@@ -607,7 +628,7 @@ void nfs_mark_page_unstable(struct page *page)
struct inode *inode = page_file_mapping(page)->host;
inc_zone_page_state(page, NR_UNSTABLE_NFS);
- inc_bdi_stat(inode_to_bdi(inode), BDI_RECLAIMABLE);
+ inc_wb_stat(&inode_to_bdi(inode)->wb, WB_RECLAIMABLE);
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
}
diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h
index 7afb8947d..ff66ae700 100644
--- a/fs/nfs/nfs42.h
+++ b/fs/nfs/nfs42.h
@@ -5,11 +5,18 @@
#ifndef __LINUX_FS_NFS_NFS4_2_H
#define __LINUX_FS_NFS_NFS4_2_H
+/*
+ * FIXME: four LAYOUTSTATS calls per compound at most! Do we need to support
+ * more? Need to consider not to pre-alloc too much for a compound.
+ */
+#define PNFS_LAYOUTSTATS_MAXDEV (4)
+
/* nfs4.2proc.c */
int nfs42_proc_allocate(struct file *, loff_t, loff_t);
int nfs42_proc_deallocate(struct file *, loff_t, loff_t);
loff_t nfs42_proc_llseek(struct file *, loff_t, int);
-
+int nfs42_proc_layoutstats_generic(struct nfs_server *,
+ struct nfs42_layoutstat_data *);
/* nfs4.2xdr.h */
extern struct rpc_procinfo nfs4_2_procedures[];
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 3a9e75235..d731bbf97 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -10,6 +10,11 @@
#include <linux/nfs_fs.h>
#include "nfs4_fs.h"
#include "nfs42.h"
+#include "iostat.h"
+#include "pnfs.h"
+#include "internal.h"
+
+#define NFSDBG_FACILITY NFSDBG_PNFS
static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file,
fmode_t fmode)
@@ -130,7 +135,7 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
return err;
}
-loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
+static loff_t _nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
{
struct inode *inode = file_inode(filep);
struct nfs42_seek_args args = {
@@ -165,3 +170,102 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes);
}
+
+loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
+{
+ struct nfs_server *server = NFS_SERVER(file_inode(filep));
+ struct nfs4_exception exception = { };
+ int err;
+
+ do {
+ err = _nfs42_proc_llseek(filep, offset, whence);
+ if (err == -ENOTSUPP)
+ return -EOPNOTSUPP;
+ err = nfs4_handle_exception(server, err, &exception);
+ } while (exception.retry);
+
+ return err;
+}
+
+
+static void
+nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata)
+{
+ struct nfs42_layoutstat_data *data = calldata;
+ struct nfs_server *server = NFS_SERVER(data->args.inode);
+
+ nfs41_setup_sequence(nfs4_get_session(server), &data->args.seq_args,
+ &data->res.seq_res, task);
+}
+
+static void
+nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
+{
+ struct nfs42_layoutstat_data *data = calldata;
+
+ if (!nfs4_sequence_done(task, &data->res.seq_res))
+ return;
+
+ switch (task->tk_status) {
+ case 0:
+ break;
+ case -ENOTSUPP:
+ case -EOPNOTSUPP:
+ NFS_SERVER(data->inode)->caps &= ~NFS_CAP_LAYOUTSTATS;
+ default:
+ dprintk("%s server returns %d\n", __func__, task->tk_status);
+ }
+}
+
+static void
+nfs42_layoutstat_release(void *calldata)
+{
+ struct nfs42_layoutstat_data *data = calldata;
+ struct nfs_server *nfss = NFS_SERVER(data->args.inode);
+
+ if (nfss->pnfs_curr_ld->cleanup_layoutstats)
+ nfss->pnfs_curr_ld->cleanup_layoutstats(data);
+
+ pnfs_put_layout_hdr(NFS_I(data->args.inode)->layout);
+ smp_mb__before_atomic();
+ clear_bit(NFS_INO_LAYOUTSTATS, &NFS_I(data->args.inode)->flags);
+ smp_mb__after_atomic();
+ nfs_iput_and_deactive(data->inode);
+ kfree(data->args.devinfo);
+ kfree(data);
+}
+
+static const struct rpc_call_ops nfs42_layoutstat_ops = {
+ .rpc_call_prepare = nfs42_layoutstat_prepare,
+ .rpc_call_done = nfs42_layoutstat_done,
+ .rpc_release = nfs42_layoutstat_release,
+};
+
+int nfs42_proc_layoutstats_generic(struct nfs_server *server,
+ struct nfs42_layoutstat_data *data)
+{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTSTATS],
+ .rpc_argp = &data->args,
+ .rpc_resp = &data->res,
+ };
+ struct rpc_task_setup task_setup = {
+ .rpc_client = server->client,
+ .rpc_message = &msg,
+ .callback_ops = &nfs42_layoutstat_ops,
+ .callback_data = data,
+ .flags = RPC_TASK_ASYNC,
+ };
+ struct rpc_task *task;
+
+ data->inode = nfs_igrab_and_active(data->args.inode);
+ if (!data->inode) {
+ nfs42_layoutstat_release(data);
+ return -EAGAIN;
+ }
+ nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
+ task = rpc_run_task(&task_setup);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ return 0;
+}
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 1a25b2724..a6bd27da6 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -4,6 +4,8 @@
#ifndef __LINUX_FS_NFS_NFS4_2XDR_H
#define __LINUX_FS_NFS_NFS4_2XDR_H
+#include "nfs42.h"
+
#define encode_fallocate_maxsz (encode_stateid_maxsz + \
2 /* offset */ + \
2 /* length */)
@@ -22,6 +24,16 @@
1 /* whence */ + \
2 /* offset */ + \
2 /* length */)
+#define encode_io_info_maxsz 4
+#define encode_layoutstats_maxsz (op_decode_hdr_maxsz + \
+ 2 /* offset */ + \
+ 2 /* length */ + \
+ encode_stateid_maxsz + \
+ encode_io_info_maxsz + \
+ encode_io_info_maxsz + \
+ 1 /* opaque devaddr4 length */ + \
+ XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE))
+#define decode_layoutstats_maxsz (op_decode_hdr_maxsz)
#define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
@@ -45,6 +57,14 @@
#define NFS4_dec_seek_sz (compound_decode_hdr_maxsz + \
decode_putfh_maxsz + \
decode_seek_maxsz)
+#define NFS4_enc_layoutstats_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ PNFS_LAYOUTSTATS_MAXDEV * encode_layoutstats_maxsz)
+#define NFS4_dec_layoutstats_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz)
static void encode_fallocate(struct xdr_stream *xdr,
@@ -81,6 +101,33 @@ static void encode_seek(struct xdr_stream *xdr,
encode_uint32(xdr, args->sa_what);
}
+static void encode_layoutstats(struct xdr_stream *xdr,
+ struct nfs42_layoutstat_args *args,
+ struct nfs42_layoutstat_devinfo *devinfo,
+ struct compound_hdr *hdr)
+{
+ __be32 *p;
+
+ encode_op_hdr(xdr, OP_LAYOUTSTATS, decode_layoutstats_maxsz, hdr);
+ p = reserve_space(xdr, 8 + 8);
+ p = xdr_encode_hyper(p, devinfo->offset);
+ p = xdr_encode_hyper(p, devinfo->length);
+ encode_nfs4_stateid(xdr, &args->stateid);
+ p = reserve_space(xdr, 4*8 + NFS4_DEVICEID4_SIZE + 4);
+ p = xdr_encode_hyper(p, devinfo->read_count);
+ p = xdr_encode_hyper(p, devinfo->read_bytes);
+ p = xdr_encode_hyper(p, devinfo->write_count);
+ p = xdr_encode_hyper(p, devinfo->write_bytes);
+ p = xdr_encode_opaque_fixed(p, devinfo->dev_id.data,
+ NFS4_DEVICEID4_SIZE);
+ /* Encode layoutupdate4 */
+ *p++ = cpu_to_be32(devinfo->layout_type);
+ if (devinfo->layoutstats_encode != NULL)
+ devinfo->layoutstats_encode(xdr, args, devinfo);
+ else
+ encode_uint32(xdr, 0);
+}
+
/*
* Encode ALLOCATE request
*/
@@ -137,6 +184,28 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req,
encode_nops(&hdr);
}
+/*
+ * Encode LAYOUTSTATS request
+ */
+static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs42_layoutstat_args *args)
+{
+ int i;
+
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ WARN_ON(args->num_dev > PNFS_LAYOUTSTATS_MAXDEV);
+ for (i = 0; i < args->num_dev; i++)
+ encode_layoutstats(xdr, args, &args->devinfo[i], &hdr);
+ encode_nops(&hdr);
+}
+
static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
{
return decode_op_hdr(xdr, OP_ALLOCATE);
@@ -169,6 +238,12 @@ out_overflow:
return -EIO;
}
+static int decode_layoutstats(struct xdr_stream *xdr,
+ struct nfs42_layoutstat_res *res)
+{
+ return decode_op_hdr(xdr, OP_LAYOUTSTATS);
+}
+
/*
* Decode ALLOCATE request
*/
@@ -246,4 +321,35 @@ static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp,
out:
return status;
}
+
+/*
+ * Decode LAYOUTSTATS request
+ */
+static int nfs4_xdr_dec_layoutstats(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfs42_layoutstat_res *res)
+{
+ struct compound_hdr hdr;
+ int status, i;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ WARN_ON(res->num_dev > PNFS_LAYOUTSTATS_MAXDEV);
+ for (i = 0; i < res->num_dev; i++) {
+ status = decode_layoutstats(xdr, res);
+ if (status)
+ goto out;
+ }
+out:
+ res->rpc_status = status;
+ return status;
+}
+
#endif /* __LINUX_FS_NFS_NFS4_2XDR_H */
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index fdef424b0..ea3bee919 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -233,6 +233,7 @@ extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception
extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *,
struct rpc_message *, struct nfs4_sequence_args *,
struct nfs4_sequence_res *, int);
+extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int);
extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index e42be52a8..3aa6a9ba5 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -676,7 +676,6 @@ found:
break;
}
- /* No matching nfs_client found. */
spin_unlock(&nn->nfs_client_lock);
dprintk("NFS: <-- %s status = %d\n", __func__, status);
nfs_put_client(prev);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index f58c17b3b..dcd39d4e2 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -41,6 +41,10 @@ nfs4_file_open(struct inode *inode, struct file *filp)
dprintk("NFS: open file(%pd2)\n", dentry);
+ err = nfs_check_flags(openflags);
+ if (err)
+ return err;
+
if ((openflags & O_ACCMODE) == 3)
openflags--;
diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c
index c0b3a16b4..039b3eb6d 100644
--- a/fs/nfs/nfs4getroot.c
+++ b/fs/nfs/nfs4getroot.c
@@ -35,13 +35,6 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_p
goto out;
}
- if (fsinfo.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
- printk(KERN_ERR "nfs4_get_rootfh:"
- " getroot obtained referral\n");
- ret = -EREMOTE;
- goto out;
- }
-
memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid));
out:
nfs_free_fattr(fsinfo.fattr);
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index 2e1737c40..535dfc69c 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -494,12 +494,7 @@ nfs_idmap_delete(struct nfs_client *clp)
int nfs_idmap_init(void)
{
- int ret;
- ret = nfs_idmap_init_keyring();
- if (ret != 0)
- goto out;
-out:
- return ret;
+ return nfs_idmap_init_keyring();
}
void nfs_idmap_quit(void)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d3f205126..3acb1eb72 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -356,6 +356,9 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_
case 0:
return 0;
case -NFS4ERR_OPENMODE:
+ case -NFS4ERR_DELEG_REVOKED:
+ case -NFS4ERR_ADMIN_REVOKED:
+ case -NFS4ERR_BAD_STATEID:
if (inode && nfs4_have_delegation(inode, FMODE_READ)) {
nfs4_inode_return_delegation(inode);
exception->retry = 1;
@@ -367,15 +370,6 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_
if (ret < 0)
break;
goto wait_on_recovery;
- case -NFS4ERR_DELEG_REVOKED:
- case -NFS4ERR_ADMIN_REVOKED:
- case -NFS4ERR_BAD_STATEID:
- if (state == NULL)
- break;
- ret = nfs4_schedule_stateid_recovery(server, state);
- if (ret < 0)
- break;
- goto wait_on_recovery;
case -NFS4ERR_EXPIRED:
if (state != NULL) {
ret = nfs4_schedule_stateid_recovery(server, state);
@@ -473,7 +467,10 @@ static void do_renew_lease(struct nfs_client *clp, unsigned long timestamp)
static void renew_lease(const struct nfs_server *server, unsigned long timestamp)
{
- do_renew_lease(server->nfs_client, timestamp);
+ struct nfs_client *clp = server->nfs_client;
+
+ if (!nfs4_has_session(clp))
+ do_renew_lease(clp, timestamp);
}
struct nfs4_call_sync_data {
@@ -482,8 +479,8 @@ struct nfs4_call_sync_data {
struct nfs4_sequence_res *seq_res;
};
-static void nfs4_init_sequence(struct nfs4_sequence_args *args,
- struct nfs4_sequence_res *res, int cache_reply)
+void nfs4_init_sequence(struct nfs4_sequence_args *args,
+ struct nfs4_sequence_res *res, int cache_reply)
{
args->sa_slot = NULL;
args->sa_cache_this = cache_reply;
@@ -622,8 +619,7 @@ int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
clp = session->clp;
do_renew_lease(clp, res->sr_timestamp);
/* Check sequence flags */
- if (res->sr_status_flags != 0)
- nfs4_schedule_lease_recovery(clp);
+ nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
nfs41_update_target_slotid(slot->table, slot, res);
break;
case 1:
@@ -916,6 +912,7 @@ struct nfs4_opendata {
struct nfs_open_confirmres c_res;
struct nfs4_string owner_name;
struct nfs4_string group_name;
+ struct nfs4_label *a_label;
struct nfs_fattr f_attr;
struct nfs4_label *f_label;
struct dentry *dir;
@@ -1019,6 +1016,10 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
if (IS_ERR(p->f_label))
goto err_free_p;
+ p->a_label = nfs4_label_alloc(server, gfp_mask);
+ if (IS_ERR(p->a_label))
+ goto err_free_f;
+
alloc_seqid = server->nfs_client->cl_mvops->alloc_seqid;
p->o_arg.seqid = alloc_seqid(&sp->so_seqid, gfp_mask);
if (IS_ERR(p->o_arg.seqid))
@@ -1047,7 +1048,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
p->o_arg.server = server;
p->o_arg.bitmask = nfs4_bitmask(server, label);
p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0];
- p->o_arg.label = label;
+ p->o_arg.label = nfs4_label_copy(p->a_label, label);
p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim);
switch (p->o_arg.claim) {
case NFS4_OPEN_CLAIM_NULL:
@@ -1080,6 +1081,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
return p;
err_free_label:
+ nfs4_label_free(p->a_label);
+err_free_f:
nfs4_label_free(p->f_label);
err_free_p:
kfree(p);
@@ -1099,6 +1102,7 @@ static void nfs4_opendata_free(struct kref *kref)
nfs4_put_open_state(p->state);
nfs4_put_state_owner(p->owner);
+ nfs4_label_free(p->a_label);
nfs4_label_free(p->f_label);
dput(p->dir);
@@ -1556,6 +1560,13 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod
struct nfs4_state *newstate;
int ret;
+ if ((opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR ||
+ opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEG_CUR_FH) &&
+ (opendata->o_arg.u.delegation_type & fmode) != fmode)
+ /* This mode can't have been delegated, so we must have
+ * a valid open_stateid to cover it - not need to reclaim.
+ */
+ return 0;
opendata->o_arg.open_flags = 0;
opendata->o_arg.fmode = fmode;
opendata->o_arg.share_access = nfs4_map_atomic_open_share(
@@ -1687,6 +1698,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
"%d.\n", __func__, err);
case 0:
case -ENOENT:
+ case -EAGAIN:
case -ESTALE:
break;
case -NFS4ERR_BADSESSION:
@@ -3358,6 +3370,8 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
goto out;
case -NFS4ERR_MOVED:
err = nfs4_get_referral(client, dir, name, fattr, fhandle);
+ if (err == -NFS4ERR_MOVED)
+ err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception);
goto out;
case -NFS4ERR_WRONGSEC:
err = -EPERM;
@@ -4958,49 +4972,128 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp,
memcpy(bootverf->data, verf, sizeof(bootverf->data));
}
-static unsigned int
-nfs4_init_nonuniform_client_string(struct nfs_client *clp,
- char *buf, size_t len)
+static int
+nfs4_init_nonuniform_client_string(struct nfs_client *clp)
{
- unsigned int result;
+ int result;
+ size_t len;
+ char *str;
+ bool retried = false;
if (clp->cl_owner_id != NULL)
- return strlcpy(buf, clp->cl_owner_id, len);
+ return 0;
+retry:
+ rcu_read_lock();
+ len = 10 + strlen(clp->cl_ipaddr) + 1 +
+ strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)) +
+ 1 +
+ strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)) +
+ 1;
+ rcu_read_unlock();
+
+ if (len > NFS4_OPAQUE_LIMIT + 1)
+ return -EINVAL;
+
+ /*
+ * Since this string is allocated at mount time, and held until the
+ * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying
+ * about a memory-reclaim deadlock.
+ */
+ str = kmalloc(len, GFP_KERNEL);
+ if (!str)
+ return -ENOMEM;
rcu_read_lock();
- result = scnprintf(buf, len, "Linux NFSv4.0 %s/%s %s",
- clp->cl_ipaddr,
- rpc_peeraddr2str(clp->cl_rpcclient,
- RPC_DISPLAY_ADDR),
- rpc_peeraddr2str(clp->cl_rpcclient,
- RPC_DISPLAY_PROTO));
+ result = scnprintf(str, len, "Linux NFSv4.0 %s/%s %s",
+ clp->cl_ipaddr,
+ rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR),
+ rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO));
rcu_read_unlock();
- clp->cl_owner_id = kstrdup(buf, GFP_KERNEL);
- return result;
+
+ /* Did something change? */
+ if (result >= len) {
+ kfree(str);
+ if (retried)
+ return -EINVAL;
+ retried = true;
+ goto retry;
+ }
+ clp->cl_owner_id = str;
+ return 0;
}
-static unsigned int
-nfs4_init_uniform_client_string(struct nfs_client *clp,
- char *buf, size_t len)
+static int
+nfs4_init_uniquifier_client_string(struct nfs_client *clp)
+{
+ int result;
+ size_t len;
+ char *str;
+
+ len = 10 + 10 + 1 + 10 + 1 +
+ strlen(nfs4_client_id_uniquifier) + 1 +
+ strlen(clp->cl_rpcclient->cl_nodename) + 1;
+
+ if (len > NFS4_OPAQUE_LIMIT + 1)
+ return -EINVAL;
+
+ /*
+ * Since this string is allocated at mount time, and held until the
+ * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying
+ * about a memory-reclaim deadlock.
+ */
+ str = kmalloc(len, GFP_KERNEL);
+ if (!str)
+ return -ENOMEM;
+
+ result = scnprintf(str, len, "Linux NFSv%u.%u %s/%s",
+ clp->rpc_ops->version, clp->cl_minorversion,
+ nfs4_client_id_uniquifier,
+ clp->cl_rpcclient->cl_nodename);
+ if (result >= len) {
+ kfree(str);
+ return -EINVAL;
+ }
+ clp->cl_owner_id = str;
+ return 0;
+}
+
+static int
+nfs4_init_uniform_client_string(struct nfs_client *clp)
{
- const char *nodename = clp->cl_rpcclient->cl_nodename;
- unsigned int result;
+ int result;
+ size_t len;
+ char *str;
if (clp->cl_owner_id != NULL)
- return strlcpy(buf, clp->cl_owner_id, len);
+ return 0;
if (nfs4_client_id_uniquifier[0] != '\0')
- result = scnprintf(buf, len, "Linux NFSv%u.%u %s/%s",
- clp->rpc_ops->version,
- clp->cl_minorversion,
- nfs4_client_id_uniquifier,
- nodename);
- else
- result = scnprintf(buf, len, "Linux NFSv%u.%u %s",
- clp->rpc_ops->version, clp->cl_minorversion,
- nodename);
- clp->cl_owner_id = kstrdup(buf, GFP_KERNEL);
- return result;
+ return nfs4_init_uniquifier_client_string(clp);
+
+ len = 10 + 10 + 1 + 10 + 1 +
+ strlen(clp->cl_rpcclient->cl_nodename) + 1;
+
+ if (len > NFS4_OPAQUE_LIMIT + 1)
+ return -EINVAL;
+
+ /*
+ * Since this string is allocated at mount time, and held until the
+ * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying
+ * about a memory-reclaim deadlock.
+ */
+ str = kmalloc(len, GFP_KERNEL);
+ if (!str)
+ return -ENOMEM;
+
+ result = scnprintf(str, len, "Linux NFSv%u.%u %s",
+ clp->rpc_ops->version, clp->cl_minorversion,
+ clp->cl_rpcclient->cl_nodename);
+ if (result >= len) {
+ kfree(str);
+ return -EINVAL;
+ }
+ clp->cl_owner_id = str;
+ return 0;
}
/*
@@ -5047,7 +5140,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
struct nfs4_setclientid setclientid = {
.sc_verifier = &sc_verifier,
.sc_prog = program,
- .sc_cb_ident = clp->cl_cb_ident,
+ .sc_clnt = clp,
};
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID],
@@ -5067,16 +5160,15 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
/* nfs_client_id4 */
nfs4_init_boot_verifier(clp, &sc_verifier);
+
if (test_bit(NFS_CS_MIGRATION, &clp->cl_flags))
- setclientid.sc_name_len =
- nfs4_init_uniform_client_string(clp,
- setclientid.sc_name,
- sizeof(setclientid.sc_name));
+ status = nfs4_init_uniform_client_string(clp);
else
- setclientid.sc_name_len =
- nfs4_init_nonuniform_client_string(clp,
- setclientid.sc_name,
- sizeof(setclientid.sc_name));
+ status = nfs4_init_nonuniform_client_string(clp);
+
+ if (status)
+ goto out;
+
/* cb_client4 */
setclientid.sc_netid_len =
nfs4_init_callback_netid(clp,
@@ -5086,9 +5178,9 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
sizeof(setclientid.sc_uaddr), "%s.%u.%u",
clp->cl_ipaddr, port >> 8, port & 255);
- dprintk("NFS call setclientid auth=%s, '%.*s'\n",
+ dprintk("NFS call setclientid auth=%s, '%s'\n",
clp->cl_rpcclient->cl_auth->au_ops->au_name,
- setclientid.sc_name_len, setclientid.sc_name);
+ clp->cl_owner_id);
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task)) {
status = PTR_ERR(task);
@@ -5360,15 +5452,15 @@ static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *
return err;
}
-static int do_vfs_lock(struct file *file, struct file_lock *fl)
+static int do_vfs_lock(struct inode *inode, struct file_lock *fl)
{
int res = 0;
switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
case FL_POSIX:
- res = posix_lock_file_wait(file, fl);
+ res = posix_lock_inode_wait(inode, fl);
break;
case FL_FLOCK:
- res = flock_lock_file_wait(file, fl);
+ res = flock_lock_inode_wait(inode, fl);
break;
default:
BUG();
@@ -5428,7 +5520,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
switch (task->tk_status) {
case 0:
renew_lease(calldata->server, calldata->timestamp);
- do_vfs_lock(calldata->fl.fl_file, &calldata->fl);
+ do_vfs_lock(calldata->lsp->ls_state->inode, &calldata->fl);
if (nfs4_update_lock_stateid(calldata->lsp,
&calldata->res.stateid))
break;
@@ -5536,7 +5628,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
mutex_lock(&sp->so_delegreturn_mutex);
/* Exclude nfs4_reclaim_open_stateid() - note nesting! */
down_read(&nfsi->rwsem);
- if (do_vfs_lock(request->fl_file, request) == -ENOENT) {
+ if (do_vfs_lock(inode, request) == -ENOENT) {
up_read(&nfsi->rwsem);
mutex_unlock(&sp->so_delegreturn_mutex);
goto out;
@@ -5677,7 +5769,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
data->timestamp);
if (data->arg.new_lock) {
data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
- if (do_vfs_lock(data->fl.fl_file, &data->fl) < 0) {
+ if (do_vfs_lock(lsp->ls_state->inode, &data->fl) < 0) {
rpc_restart_call_prepare(task);
break;
}
@@ -5919,7 +6011,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
if (status != 0)
goto out;
request->fl_flags |= FL_ACCESS;
- status = do_vfs_lock(request->fl_file, request);
+ status = do_vfs_lock(state->inode, request);
if (status < 0)
goto out;
down_read(&nfsi->rwsem);
@@ -5927,7 +6019,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
/* Yes: cache locks! */
/* ...but avoid races with delegation recall... */
request->fl_flags = fl_flags & ~FL_SLEEP;
- status = do_vfs_lock(request->fl_file, request);
+ status = do_vfs_lock(state->inode, request);
up_read(&nfsi->rwsem);
goto out;
}
@@ -6849,11 +6941,14 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
};
nfs4_init_boot_verifier(clp, &verifier);
- args.id_len = nfs4_init_uniform_client_string(clp, args.id,
- sizeof(args.id));
- dprintk("NFS call exchange_id auth=%s, '%.*s'\n",
+
+ status = nfs4_init_uniform_client_string(clp);
+ if (status)
+ goto out;
+
+ dprintk("NFS call exchange_id auth=%s, '%s'\n",
clp->cl_rpcclient->cl_auth->au_ops->au_name,
- args.id_len, args.id);
+ clp->cl_owner_id);
res.server_owner = kzalloc(sizeof(struct nfs41_server_owner),
GFP_NOFS);
@@ -6888,7 +6983,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
/* unsupported! */
WARN_ON_ONCE(1);
status = -EINVAL;
- goto out_server_scope;
+ goto out_impl_id;
}
status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
@@ -6916,6 +7011,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
/* use the most recent implementation id */
kfree(clp->cl_implid);
clp->cl_implid = res.impl_id;
+ res.impl_id = NULL;
if (clp->cl_serverscope != NULL &&
!nfs41_same_server_scope(clp->cl_serverscope,
@@ -6929,15 +7025,16 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
if (clp->cl_serverscope == NULL) {
clp->cl_serverscope = res.server_scope;
- goto out;
+ res.server_scope = NULL;
}
- } else
- kfree(res.impl_id);
+ }
-out_server_owner:
- kfree(res.server_owner);
+out_impl_id:
+ kfree(res.impl_id);
out_server_scope:
kfree(res.server_scope);
+out_server_owner:
+ kfree(res.server_owner);
out:
if (clp->cl_implid != NULL)
dprintk("NFS reply exchange_id: Server Implementation ID: "
@@ -7487,13 +7584,8 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
goto out;
}
ret = rpc_wait_for_completion_task(task);
- if (!ret) {
- struct nfs4_sequence_res *res = task->tk_msg.rpc_resp;
-
- if (task->tk_status == 0)
- nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
+ if (!ret)
ret = task->tk_status;
- }
rpc_put_task(task);
out:
dprintk("<-- %s status=%d\n", __func__, ret);
@@ -7881,16 +7973,17 @@ static void nfs4_layoutreturn_release(void *calldata)
{
struct nfs4_layoutreturn *lrp = calldata;
struct pnfs_layout_hdr *lo = lrp->args.layout;
+ LIST_HEAD(freeme);
dprintk("--> %s\n", __func__);
spin_lock(&lo->plh_inode->i_lock);
if (lrp->res.lrs_present)
pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
+ pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range);
pnfs_clear_layoutreturn_waitbit(lo);
- clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags);
- rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
lo->plh_block_lgets--;
spin_unlock(&lo->plh_inode->i_lock);
+ pnfs_free_lseg_list(&freeme);
pnfs_put_layout_hdr(lrp->args.layout);
nfs_iput_and_deactive(lrp->inode);
kfree(calldata);
@@ -8064,9 +8157,8 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
struct rpc_task *task;
int status = 0;
- dprintk("NFS: %4d initiating layoutcommit call. sync %d "
- "lbw: %llu inode %lu\n",
- data->task.tk_pid, sync,
+ dprintk("NFS: initiating layoutcommit call. sync %d "
+ "lbw: %llu inode %lu\n", sync,
data->args.lastbytewritten,
data->args.inode->i_ino);
@@ -8505,7 +8597,6 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
.minor_version = 0,
.init_caps = NFS_CAP_READDIRPLUS
| NFS_CAP_ATOMIC_OPEN
- | NFS_CAP_CHANGE_ATTR
| NFS_CAP_POSIX_LOCK,
.init_client = nfs40_init_client,
.shutdown_client = nfs40_shutdown_client,
@@ -8531,7 +8622,6 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
.minor_version = 1,
.init_caps = NFS_CAP_READDIRPLUS
| NFS_CAP_ATOMIC_OPEN
- | NFS_CAP_CHANGE_ATTR
| NFS_CAP_POSIX_LOCK
| NFS_CAP_STATEID_NFSV41
| NFS_CAP_ATOMIC_OPEN_V1,
@@ -8554,13 +8644,13 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
.minor_version = 2,
.init_caps = NFS_CAP_READDIRPLUS
| NFS_CAP_ATOMIC_OPEN
- | NFS_CAP_CHANGE_ATTR
| NFS_CAP_POSIX_LOCK
| NFS_CAP_STATEID_NFSV41
| NFS_CAP_ATOMIC_OPEN_V1
| NFS_CAP_ALLOCATE
| NFS_CAP_DEALLOCATE
- | NFS_CAP_SEEK,
+ | NFS_CAP_SEEK
+ | NFS_CAP_LAYOUTSTATS,
.init_client = nfs41_init_client,
.shutdown_client = nfs41_shutdown_client,
.match_stateid = nfs41_match_stateid,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index ddef1dc80..f2e2ad894 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -309,7 +309,6 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
goto do_confirm;
- nfs4_begin_drain_session(clp);
status = nfs4_proc_exchange_id(clp, cred);
if (status != 0)
goto out;
@@ -1832,6 +1831,7 @@ static int nfs4_establish_lease(struct nfs_client *clp)
clp->cl_mvops->reboot_recovery_ops;
int status;
+ nfs4_begin_drain_session(clp);
cred = nfs4_get_clid_cred(clp);
if (cred == NULL)
return -ENOENT;
@@ -2191,25 +2191,35 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp)
}
}
-static void nfs41_handle_state_revoked(struct nfs_client *clp)
+static void nfs41_handle_all_state_revoked(struct nfs_client *clp)
{
nfs4_reset_all_state(clp);
dprintk("%s: state revoked on server %s\n", __func__, clp->cl_hostname);
}
+static void nfs41_handle_some_state_revoked(struct nfs_client *clp)
+{
+ nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce);
+ nfs4_schedule_state_manager(clp);
+
+ dprintk("%s: state revoked on server %s\n", __func__, clp->cl_hostname);
+}
+
static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp)
{
- /* This will need to handle layouts too */
- nfs_expire_all_delegations(clp);
+ /* FIXME: For now, we destroy all layouts. */
+ pnfs_destroy_all_layouts(clp);
+ /* FIXME: For now, we test all delegations+open state+locks. */
+ nfs41_handle_some_state_revoked(clp);
dprintk("%s: Recallable state revoked on server %s!\n", __func__,
clp->cl_hostname);
}
static void nfs41_handle_backchannel_fault(struct nfs_client *clp)
{
- nfs_expire_all_delegations(clp);
- if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)
- nfs4_schedule_state_manager(clp);
+ set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
+ nfs4_schedule_state_manager(clp);
+
dprintk("%s: server %s declared a backchannel fault\n", __func__,
clp->cl_hostname);
}
@@ -2231,10 +2241,11 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED)
nfs41_handle_server_reboot(clp);
- if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
- SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED |
+ if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED))
+ nfs41_handle_all_state_revoked(clp);
+ if (flags & (SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED |
SEQ4_STATUS_ADMIN_STATE_REVOKED))
- nfs41_handle_state_revoked(clp);
+ nfs41_handle_some_state_revoked(clp);
if (flags & SEQ4_STATUS_LEASE_MOVED)
nfs4_schedule_lease_moved_recovery(clp);
if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 0aea97841..558cd65db 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -139,7 +139,8 @@ static int nfs4_stat_to_errno(int);
#define encode_setclientid_maxsz \
(op_encode_hdr_maxsz + \
XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \
- XDR_QUADLEN(NFS4_SETCLIENTID_NAMELEN) + \
+ /* client name */ \
+ 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
1 /* sc_prog */ + \
1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \
1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \
@@ -288,7 +289,8 @@ static int nfs4_stat_to_errno(int);
#define encode_exchange_id_maxsz (op_encode_hdr_maxsz + \
encode_verifier_maxsz + \
1 /* co_ownerid.len */ + \
- XDR_QUADLEN(NFS4_EXCHANGE_ID_LEN) + \
+ /* eia_clientowner */ \
+ 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
1 /* flags */ + \
1 /* spa_how */ + \
/* max is SP4_MACH_CRED (for now) */ + \
@@ -1667,13 +1669,14 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie
encode_op_hdr(xdr, OP_SETCLIENTID, decode_setclientid_maxsz, hdr);
encode_nfs4_verifier(xdr, setclientid->sc_verifier);
- encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name);
+ encode_string(xdr, strlen(setclientid->sc_clnt->cl_owner_id),
+ setclientid->sc_clnt->cl_owner_id);
p = reserve_space(xdr, 4);
*p = cpu_to_be32(setclientid->sc_prog);
encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid);
encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr);
p = reserve_space(xdr, 4);
- *p = cpu_to_be32(setclientid->sc_cb_ident);
+ *p = cpu_to_be32(setclientid->sc_clnt->cl_cb_ident);
}
static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr)
@@ -1747,7 +1750,8 @@ static void encode_exchange_id(struct xdr_stream *xdr,
encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr);
encode_nfs4_verifier(xdr, args->verifier);
- encode_string(xdr, args->id_len, args->id);
+ encode_string(xdr, strlen(args->client->cl_owner_id),
+ args->client->cl_owner_id);
encode_uint32(xdr, args->flags);
encode_uint32(xdr, args->state_protect.how);
@@ -7427,6 +7431,7 @@ struct rpc_procinfo nfs4_procedures[] = {
PROC(SEEK, enc_seek, dec_seek),
PROC(ALLOCATE, enc_allocate, dec_allocate),
PROC(DEALLOCATE, enc_deallocate, dec_deallocate),
+ PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats),
#endif /* CONFIG_NFS_V4_2 */
};
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 7b4552678..4984bbe55 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -636,9 +636,8 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
hdr->rw_ops->rw_initiate(hdr, &msg, rpc_ops, &task_setup_data, how);
- dprintk("NFS: %5u initiated pgio call "
+ dprintk("NFS: initiated pgio call "
"(req %s/%llu, %u bytes @ offset %llu)\n",
- hdr->task.tk_pid,
hdr->inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(hdr->inode),
hdr->args.count,
@@ -690,8 +689,6 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
static void nfs_pgio_release(void *calldata)
{
struct nfs_pgio_header *hdr = calldata;
- if (hdr->rw_ops->rw_release)
- hdr->rw_ops->rw_release(hdr);
nfs_pgio_data_destroy(hdr);
hdr->completion_ops->completion(hdr);
}
@@ -711,7 +708,9 @@ static void nfs_pageio_mirror_init(struct nfs_pgio_mirror *mirror,
* nfs_pageio_init - initialise a page io descriptor
* @desc: pointer to descriptor
* @inode: pointer to inode
- * @doio: pointer to io function
+ * @pg_ops: pointer to pageio operations
+ * @compl_ops: pointer to pageio completion operations
+ * @rw_ops: pointer to nfs read/write operations
* @bsize: io block size
* @io_flags: extra parameters for the io function
*/
@@ -1101,8 +1100,6 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
mirror->pg_base = 0;
mirror->pg_recoalesce = 0;
- desc->pg_moreio = 0;
-
while (!list_empty(&head)) {
struct nfs_page *req;
@@ -1189,6 +1186,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
* nfs_pageio_complete_mirror - Complete I/O on the current mirror of an
* nfs_pageio_descriptor
* @desc: pointer to io descriptor
+ * @mirror_idx: pointer to mirror index
*/
static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc,
u32 mirror_idx)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index d47c18868..70bf706b1 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -35,6 +35,7 @@
#include "iostat.h"
#include "nfs4trace.h"
#include "delegation.h"
+#include "nfs42.h"
#define NFSDBG_FACILITY NFSDBG_PNFS
#define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ)
@@ -351,7 +352,7 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo,
{
struct pnfs_layout_segment *s;
- if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
+ if (!test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
return false;
list_for_each_entry(s, &lo->plh_segs, pls_list)
@@ -361,6 +362,18 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo,
return true;
}
+static bool
+pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
+{
+ if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
+ return false;
+ lo->plh_return_iomode = 0;
+ lo->plh_block_lgets++;
+ pnfs_get_layout_hdr(lo);
+ clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags);
+ return true;
+}
+
static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
struct pnfs_layout_hdr *lo, struct inode *inode)
{
@@ -371,17 +384,16 @@ static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
if (pnfs_layout_need_return(lo, lseg)) {
nfs4_stateid stateid;
enum pnfs_iomode iomode;
+ bool send;
stateid = lo->plh_stateid;
iomode = lo->plh_return_iomode;
- /* decreased in pnfs_send_layoutreturn() */
- lo->plh_block_lgets++;
- lo->plh_return_iomode = 0;
+ send = pnfs_prepare_layoutreturn(lo);
spin_unlock(&inode->i_lock);
- pnfs_get_layout_hdr(lo);
-
- /* Send an async layoutreturn so we dont deadlock */
- pnfs_send_layoutreturn(lo, stateid, iomode, false);
+ if (send) {
+ /* Send an async layoutreturn so we dont deadlock */
+ pnfs_send_layoutreturn(lo, stateid, iomode, false);
+ }
} else
spin_unlock(&inode->i_lock);
}
@@ -410,6 +422,10 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg)
pnfs_layoutreturn_before_put_lseg(lseg, lo, inode);
if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
+ if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
+ spin_unlock(&inode->i_lock);
+ return;
+ }
pnfs_get_layout_hdr(lo);
pnfs_layout_remove_lseg(lo, lseg);
spin_unlock(&inode->i_lock);
@@ -450,6 +466,8 @@ pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg)
test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
if (atomic_dec_and_test(&lseg->pls_refcount)) {
struct pnfs_layout_hdr *lo = lseg->pls_layout;
+ if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags))
+ return;
pnfs_get_layout_hdr(lo);
pnfs_layout_remove_lseg(lo, lseg);
pnfs_free_lseg_async(lseg);
@@ -923,6 +941,7 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags);
smp_mb__after_atomic();
wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN);
+ rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
}
static int
@@ -977,6 +996,7 @@ _pnfs_return_layout(struct inode *ino)
LIST_HEAD(tmp_list);
nfs4_stateid stateid;
int status = 0, empty;
+ bool send;
dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino);
@@ -1006,17 +1026,18 @@ _pnfs_return_layout(struct inode *ino)
/* Don't send a LAYOUTRETURN if list was initially empty */
if (empty) {
spin_unlock(&ino->i_lock);
- pnfs_put_layout_hdr(lo);
dprintk("NFS: %s no layout segments to return\n", __func__);
- goto out;
+ goto out_put_layout_hdr;
}
set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
- lo->plh_block_lgets++;
+ send = pnfs_prepare_layoutreturn(lo);
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&tmp_list);
-
- status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true);
+ if (send)
+ status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true);
+out_put_layout_hdr:
+ pnfs_put_layout_hdr(lo);
out:
dprintk("<-- %s status: %d\n", __func__, status);
return status;
@@ -1096,13 +1117,9 @@ bool pnfs_roc(struct inode *ino)
out_noroc:
if (lo) {
stateid = lo->plh_stateid;
- layoutreturn =
- test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
- &lo->plh_flags);
- if (layoutreturn) {
- lo->plh_block_lgets++;
- pnfs_get_layout_hdr(lo);
- }
+ if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+ &lo->plh_flags))
+ layoutreturn = pnfs_prepare_layoutreturn(lo);
}
spin_unlock(&ino->i_lock);
if (layoutreturn) {
@@ -1145,15 +1162,18 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task)
struct pnfs_layout_segment *lseg;
nfs4_stateid stateid;
u32 current_seqid;
- bool found = false, layoutreturn = false;
+ bool layoutreturn = false;
spin_lock(&ino->i_lock);
- list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list)
- if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
- rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
- found = true;
- goto out;
- }
+ list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) {
+ if (!test_bit(NFS_LSEG_ROC, &lseg->pls_flags))
+ continue;
+ if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags))
+ continue;
+ rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
+ spin_unlock(&ino->i_lock);
+ return true;
+ }
lo = nfsi->layout;
current_seqid = be32_to_cpu(lo->plh_stateid.seqid);
@@ -1161,23 +1181,19 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task)
* a barrier, we choose the worst-case barrier.
*/
*barrier = current_seqid + atomic_read(&lo->plh_outstanding);
-out:
- if (!found) {
- stateid = lo->plh_stateid;
- layoutreturn =
- test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
- &lo->plh_flags);
- if (layoutreturn) {
- lo->plh_block_lgets++;
- pnfs_get_layout_hdr(lo);
- }
- }
+ stateid = lo->plh_stateid;
+ if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+ &lo->plh_flags))
+ layoutreturn = pnfs_prepare_layoutreturn(lo);
+ if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
+ rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
+
spin_unlock(&ino->i_lock);
if (layoutreturn) {
- rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, false);
+ return true;
}
- return found;
+ return false;
}
/*
@@ -1694,7 +1710,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
spin_lock(&inode->i_lock);
/* set failure bit so that pnfs path will be retried later */
pnfs_layout_set_fail_bit(lo, iomode);
- set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
if (lo->plh_return_iomode == 0)
lo->plh_return_iomode = range.iomode;
else if (lo->plh_return_iomode != range.iomode)
@@ -2206,13 +2221,12 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
if (ld->prepare_layoutcommit) {
status = ld->prepare_layoutcommit(&data->args);
if (status) {
+ put_rpccred(data->cred);
spin_lock(&inode->i_lock);
set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags);
if (end_pos > nfsi->layout->plh_lwb)
nfsi->layout->plh_lwb = end_pos;
- spin_unlock(&inode->i_lock);
- put_rpccred(data->cred);
- goto clear_layoutcommitting;
+ goto out_unlock;
}
}
@@ -2250,3 +2264,63 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
}
return thp;
}
+
+#if IS_ENABLED(CONFIG_NFS_V4_2)
+int
+pnfs_report_layoutstat(struct inode *inode)
+{
+ struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs42_layoutstat_data *data;
+ struct pnfs_layout_hdr *hdr;
+ int status = 0;
+
+ if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats)
+ goto out;
+
+ if (!nfs_server_capable(inode, NFS_CAP_LAYOUTSTATS))
+ goto out;
+
+ if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags))
+ goto out;
+
+ spin_lock(&inode->i_lock);
+ if (!NFS_I(inode)->layout) {
+ spin_unlock(&inode->i_lock);
+ goto out;
+ }
+ hdr = NFS_I(inode)->layout;
+ pnfs_get_layout_hdr(hdr);
+ spin_unlock(&inode->i_lock);
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data) {
+ status = -ENOMEM;
+ goto out_put;
+ }
+
+ data->args.fh = NFS_FH(inode);
+ data->args.inode = inode;
+ nfs4_stateid_copy(&data->args.stateid, &hdr->plh_stateid);
+ status = ld->prepare_layoutstats(&data->args);
+ if (status)
+ goto out_free;
+
+ status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data);
+
+out:
+ dprintk("%s returns %d\n", __func__, status);
+ return status;
+
+out_free:
+ kfree(data);
+out_put:
+ pnfs_put_layout_hdr(hdr);
+ smp_mb__before_atomic();
+ clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags);
+ smp_mb__after_atomic();
+ goto out;
+}
+EXPORT_SYMBOL_GPL(pnfs_report_layoutstat);
+#endif
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 1e6308f82..3e6ab7bfb 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -178,6 +178,8 @@ struct pnfs_layoutdriver_type {
void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo,
struct xdr_stream *xdr,
const struct nfs4_layoutcommit_args *args);
+ int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args);
+ void (*cleanup_layoutstats) (struct nfs42_layoutstat_data *data);
};
struct pnfs_layout_hdr {
@@ -290,7 +292,6 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *);
struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
void pnfs_error_mark_layout_for_return(struct inode *inode,
struct pnfs_layout_segment *lseg);
-
/* nfs4_deviceid_flags */
enum {
NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */
@@ -689,4 +690,14 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void)
#endif /* CONFIG_NFS_V4_1 */
+#if IS_ENABLED(CONFIG_NFS_V4_2)
+int pnfs_report_layoutstat(struct inode *inode);
+#else
+static inline int
+pnfs_report_layoutstat(struct inode *inode)
+{
+ return 0;
+}
+#endif
+
#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f175b833b..aa62004f1 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2847,7 +2847,7 @@ static int param_set_portnr(const char *val, const struct kernel_param *kp)
*((unsigned int *)kp->arg) = num;
return 0;
}
-static struct kernel_param_ops param_ops_portnr = {
+static const struct kernel_param_ops param_ops_portnr = {
.set = param_set_portnr,
.get = param_get_uint,
};
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 2d5620065..b6de433da 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -20,7 +20,6 @@
#include <linux/stat.h>
#include <linux/mm.h>
#include <linux/string.h>
-#include <linux/namei.h>
/* Symlink caching in the page cache is even more simplistic
* and straight-forward than readdir caching.
@@ -43,7 +42,7 @@ error:
return -EIO;
}
-static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *nfs_follow_link(struct dentry *dentry, void **cookie)
{
struct inode *inode = d_inode(dentry);
struct page *page;
@@ -51,19 +50,13 @@ static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
if (err)
- goto read_failed;
+ return err;
page = read_cache_page(&inode->i_data, 0,
(filler_t *)nfs_symlink_filler, inode);
- if (IS_ERR(page)) {
- err = page;
- goto read_failed;
- }
- nd_set_link(nd, kmap(page));
- return page;
-
-read_failed:
- nd_set_link(nd, err);
- return NULL;
+ if (IS_ERR(page))
+ return ERR_CAST(page);
+ *cookie = page;
+ return kmap(page);
}
/*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index daf355642..75a35a1af 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -853,7 +853,8 @@ static void
nfs_clear_page_commit(struct page *page)
{
dec_zone_page_state(page, NR_UNSTABLE_NFS);
- dec_bdi_stat(inode_to_bdi(page_file_mapping(page)->host), BDI_RECLAIMABLE);
+ dec_wb_stat(&inode_to_bdi(page_file_mapping(page)->host)->wb,
+ WB_RECLAIMABLE);
}
/* Called holding inode (/cinfo) lock */
@@ -1348,11 +1349,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
}
-static void nfs_writeback_release_common(struct nfs_pgio_header *hdr)
-{
- /* do nothing! */
-}
-
/*
* Special version of should_remove_suid() that ignores capabilities.
*/
@@ -1383,24 +1379,27 @@ static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr,
{
struct nfs_pgio_args *argp = &hdr->args;
struct nfs_pgio_res *resp = &hdr->res;
+ u64 size = argp->offset + resp->count;
if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
+ fattr->size = size;
+ if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode)) {
+ fattr->valid &= ~NFS_ATTR_FATTR_SIZE;
return;
- if (argp->offset + resp->count != fattr->size)
- return;
- if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode))
+ }
+ if (size != fattr->size)
return;
/* Set attribute barrier */
nfs_fattr_set_barrier(fattr);
+ /* ...and update size */
+ fattr->valid |= NFS_ATTR_FATTR_SIZE;
}
void nfs_writeback_update_inode(struct nfs_pgio_header *hdr)
{
- struct nfs_fattr *fattr = hdr->res.fattr;
+ struct nfs_fattr *fattr = &hdr->fattr;
struct inode *inode = hdr->inode;
- if (fattr == NULL)
- return;
spin_lock(&inode->i_lock);
nfs_writeback_check_extend(hdr, fattr);
nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
@@ -1556,7 +1555,7 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
/* Set up the initial task struct. */
nfs_ops->commit_setup(data, &msg);
- dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
+ dprintk("NFS: initiated commit call\n");
nfs4_state_protect(NFS_SERVER(data->inode)->nfs_client,
NFS_SP4_MACH_CRED_COMMIT, &task_setup_data.rpc_client, &msg);
@@ -2013,7 +2012,6 @@ static const struct nfs_rw_ops nfs_rw_write_ops = {
.rw_mode = FMODE_WRITE,
.rw_alloc_header = nfs_writehdr_alloc,
.rw_free_header = nfs_writehdr_free,
- .rw_release = nfs_writeback_release_common,
.rw_done = nfs_writeback_done,
.rw_result = nfs_writeback_result,
.rw_initiate = nfs_initiate_write,