diff options
author | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2016-06-10 05:30:17 -0300 |
---|---|---|
committer | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2016-06-10 05:30:17 -0300 |
commit | d635711daa98be86d4c7fd01499c34f566b54ccb (patch) | |
tree | aa5cc3760a27c3d57146498cb82fa549547de06c /fs/nfsd | |
parent | c91265cd0efb83778f015b4d4b1129bd2cfd075e (diff) |
Linux-libre 4.6.2-gnu
Diffstat (limited to 'fs/nfsd')
-rw-r--r-- | fs/nfsd/Kconfig | 28 | ||||
-rw-r--r-- | fs/nfsd/Makefile | 4 | ||||
-rw-r--r-- | fs/nfsd/blocklayout.c | 298 | ||||
-rw-r--r-- | fs/nfsd/blocklayoutxdr.c | 77 | ||||
-rw-r--r-- | fs/nfsd/blocklayoutxdr.h | 14 | ||||
-rw-r--r-- | fs/nfsd/nfs3proc.c | 7 | ||||
-rw-r--r-- | fs/nfsd/nfs4layouts.c | 31 | ||||
-rw-r--r-- | fs/nfsd/nfs4proc.c | 8 | ||||
-rw-r--r-- | fs/nfsd/nfs4recover.c | 29 | ||||
-rw-r--r-- | fs/nfsd/nfs4state.c | 29 | ||||
-rw-r--r-- | fs/nfsd/nfs4xdr.c | 13 | ||||
-rw-r--r-- | fs/nfsd/pnfs.h | 8 | ||||
-rw-r--r-- | fs/nfsd/vfs.c | 4 | ||||
-rw-r--r-- | fs/nfsd/vfs.h | 19 |
14 files changed, 477 insertions, 92 deletions
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index a0b77fc1b..c9f583d7b 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -84,12 +84,30 @@ config NFSD_V4 If unsure, say N. config NFSD_PNFS - bool "NFSv4.1 server support for Parallel NFS (pNFS)" - depends on NFSD_V4 + bool + +config NFSD_BLOCKLAYOUT + bool "NFSv4.1 server support for pNFS block layouts" + depends on NFSD_V4 && BLOCK + select NFSD_PNFS + help + This option enables support for the exporting pNFS block layouts + in the kernel's NFS server. The pNFS block layout enables NFS + clients to directly perform I/O to block devices accesible to both + the server and the clients. See RFC 5663 for more details. + + If unsure, say N. + +config NFSD_SCSILAYOUT + bool "NFSv4.1 server support for pNFS SCSI layouts" + depends on NFSD_V4 && BLOCK + select NFSD_PNFS help - This option enables support for the parallel NFS features of the - minor version 1 of the NFSv4 protocol (RFC5661) in the kernel's NFS - server. + This option enables support for the exporting pNFS SCSI layouts + in the kernel's NFS server. The pNFS SCSI layout enables NFS + clients to directly perform I/O to SCSI devices accesible to both + the server and the clients. See draft-ietf-nfsv4-scsi-layout for + more details. If unsure, say N. diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index 9a6028e12..3ae5f3c77 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -17,4 +17,6 @@ nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ nfs4acl.o nfs4callback.o nfs4recover.o -nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o blocklayout.o blocklayoutxdr.o +nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o +nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o +nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index c29d9421b..e55b52426 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -1,11 +1,14 @@ /* - * Copyright (c) 2014 Christoph Hellwig. + * Copyright (c) 2014-2016 Christoph Hellwig. */ #include <linux/exportfs.h> #include <linux/genhd.h> #include <linux/slab.h> +#include <linux/pr.h> #include <linux/nfsd/debug.h> +#include <scsi/scsi_proto.h> +#include <scsi/scsi_common.h> #include "blocklayoutxdr.h" #include "pnfs.h" @@ -13,37 +16,6 @@ #define NFSDDBG_FACILITY NFSDDBG_PNFS -static int -nfsd4_block_get_device_info_simple(struct super_block *sb, - struct nfsd4_getdeviceinfo *gdp) -{ - struct pnfs_block_deviceaddr *dev; - struct pnfs_block_volume *b; - - dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) + - sizeof(struct pnfs_block_volume), GFP_KERNEL); - if (!dev) - return -ENOMEM; - gdp->gd_device = dev; - - dev->nr_volumes = 1; - b = &dev->volumes[0]; - - b->type = PNFS_BLOCK_VOLUME_SIMPLE; - b->simple.sig_len = PNFS_BLOCK_UUID_LEN; - return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len, - &b->simple.offset); -} - -static __be32 -nfsd4_block_proc_getdeviceinfo(struct super_block *sb, - struct nfsd4_getdeviceinfo *gdp) -{ - if (sb->s_bdev != sb->s_bdev->bd_contains) - return nfserr_inval; - return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp)); -} - static __be32 nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp, struct nfsd4_layoutget *args) @@ -141,20 +113,13 @@ out_layoutunavailable: } static __be32 -nfsd4_block_proc_layoutcommit(struct inode *inode, - struct nfsd4_layoutcommit *lcp) +nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp, + struct iomap *iomaps, int nr_iomaps) { loff_t new_size = lcp->lc_last_wr + 1; struct iattr iattr = { .ia_valid = 0 }; - struct iomap *iomaps; - int nr_iomaps; int error; - nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout, - lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits); - if (nr_iomaps < 0) - return nfserrno(nr_iomaps); - if (lcp->lc_mtime.tv_nsec == UTIME_NOW || timespec_compare(&lcp->lc_mtime, &inode->i_mtime) < 0) lcp->lc_mtime = current_fs_time(inode->i_sb); @@ -172,6 +137,54 @@ nfsd4_block_proc_layoutcommit(struct inode *inode, return nfserrno(error); } +#ifdef CONFIG_NFSD_BLOCKLAYOUT +static int +nfsd4_block_get_device_info_simple(struct super_block *sb, + struct nfsd4_getdeviceinfo *gdp) +{ + struct pnfs_block_deviceaddr *dev; + struct pnfs_block_volume *b; + + dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) + + sizeof(struct pnfs_block_volume), GFP_KERNEL); + if (!dev) + return -ENOMEM; + gdp->gd_device = dev; + + dev->nr_volumes = 1; + b = &dev->volumes[0]; + + b->type = PNFS_BLOCK_VOLUME_SIMPLE; + b->simple.sig_len = PNFS_BLOCK_UUID_LEN; + return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len, + &b->simple.offset); +} + +static __be32 +nfsd4_block_proc_getdeviceinfo(struct super_block *sb, + struct nfs4_client *clp, + struct nfsd4_getdeviceinfo *gdp) +{ + if (sb->s_bdev != sb->s_bdev->bd_contains) + return nfserr_inval; + return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp)); +} + +static __be32 +nfsd4_block_proc_layoutcommit(struct inode *inode, + struct nfsd4_layoutcommit *lcp) +{ + struct iomap *iomaps; + int nr_iomaps; + + nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout, + lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits); + if (nr_iomaps < 0) + return nfserrno(nr_iomaps); + + return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps); +} + const struct nfsd4_layout_ops bl_layout_ops = { /* * Pretend that we send notification to the client. This is a blatant @@ -190,3 +203,206 @@ const struct nfsd4_layout_ops bl_layout_ops = { .encode_layoutget = nfsd4_block_encode_layoutget, .proc_layoutcommit = nfsd4_block_proc_layoutcommit, }; +#endif /* CONFIG_NFSD_BLOCKLAYOUT */ + +#ifdef CONFIG_NFSD_SCSILAYOUT +static int nfsd4_scsi_identify_device(struct block_device *bdev, + struct pnfs_block_volume *b) +{ + struct request_queue *q = bdev->bd_disk->queue; + struct request *rq; + size_t bufflen = 252, len, id_len; + u8 *buf, *d, type, assoc; + int error; + + buf = kzalloc(bufflen, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + rq = blk_get_request(q, READ, GFP_KERNEL); + if (IS_ERR(rq)) { + error = -ENOMEM; + goto out_free_buf; + } + blk_rq_set_block_pc(rq); + + error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL); + if (error) + goto out_put_request; + + rq->cmd[0] = INQUIRY; + rq->cmd[1] = 1; + rq->cmd[2] = 0x83; + rq->cmd[3] = bufflen >> 8; + rq->cmd[4] = bufflen & 0xff; + rq->cmd_len = COMMAND_SIZE(INQUIRY); + + error = blk_execute_rq(rq->q, NULL, rq, 1); + if (error) { + pr_err("pNFS: INQUIRY 0x83 failed with: %x\n", + rq->errors); + goto out_put_request; + } + + len = (buf[2] << 8) + buf[3] + 4; + if (len > bufflen) { + pr_err("pNFS: INQUIRY 0x83 response invalid (len = %zd)\n", + len); + goto out_put_request; + } + + d = buf + 4; + for (d = buf + 4; d < buf + len; d += id_len + 4) { + id_len = d[3]; + type = d[1] & 0xf; + assoc = (d[1] >> 4) & 0x3; + + /* + * We only care about a EUI-64 and NAA designator types + * with LU association. + */ + if (assoc != 0x00) + continue; + if (type != 0x02 && type != 0x03) + continue; + if (id_len != 8 && id_len != 12 && id_len != 16) + continue; + + b->scsi.code_set = PS_CODE_SET_BINARY; + b->scsi.designator_type = type == 0x02 ? + PS_DESIGNATOR_EUI64 : PS_DESIGNATOR_NAA; + b->scsi.designator_len = id_len; + memcpy(b->scsi.designator, d + 4, id_len); + + /* + * If we found a 8 or 12 byte descriptor continue on to + * see if a 16 byte one is available. If we find a + * 16 byte descriptor we're done. + */ + if (id_len == 16) + break; + } + +out_put_request: + blk_put_request(rq); +out_free_buf: + kfree(buf); + return error; +} + +#define NFSD_MDS_PR_KEY 0x0100000000000000 + +/* + * We use the client ID as a unique key for the reservations. + * This allows us to easily fence a client when recalls fail. + */ +static u64 nfsd4_scsi_pr_key(struct nfs4_client *clp) +{ + return ((u64)clp->cl_clientid.cl_boot << 32) | clp->cl_clientid.cl_id; +} + +static int +nfsd4_block_get_device_info_scsi(struct super_block *sb, + struct nfs4_client *clp, + struct nfsd4_getdeviceinfo *gdp) +{ + struct pnfs_block_deviceaddr *dev; + struct pnfs_block_volume *b; + const struct pr_ops *ops; + int error; + + dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) + + sizeof(struct pnfs_block_volume), GFP_KERNEL); + if (!dev) + return -ENOMEM; + gdp->gd_device = dev; + + dev->nr_volumes = 1; + b = &dev->volumes[0]; + + b->type = PNFS_BLOCK_VOLUME_SCSI; + b->scsi.pr_key = nfsd4_scsi_pr_key(clp); + + error = nfsd4_scsi_identify_device(sb->s_bdev, b); + if (error) + return error; + + ops = sb->s_bdev->bd_disk->fops->pr_ops; + if (!ops) { + pr_err("pNFS: device %s does not support PRs.\n", + sb->s_id); + return -EINVAL; + } + + error = ops->pr_register(sb->s_bdev, 0, NFSD_MDS_PR_KEY, true); + if (error) { + pr_err("pNFS: failed to register key for device %s.\n", + sb->s_id); + return -EINVAL; + } + + error = ops->pr_reserve(sb->s_bdev, NFSD_MDS_PR_KEY, + PR_EXCLUSIVE_ACCESS_REG_ONLY, 0); + if (error) { + pr_err("pNFS: failed to reserve device %s.\n", + sb->s_id); + return -EINVAL; + } + + return 0; +} + +static __be32 +nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb, + struct nfs4_client *clp, + struct nfsd4_getdeviceinfo *gdp) +{ + if (sb->s_bdev != sb->s_bdev->bd_contains) + return nfserr_inval; + return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp)); +} +static __be32 +nfsd4_scsi_proc_layoutcommit(struct inode *inode, + struct nfsd4_layoutcommit *lcp) +{ + struct iomap *iomaps; + int nr_iomaps; + + nr_iomaps = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout, + lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits); + if (nr_iomaps < 0) + return nfserrno(nr_iomaps); + + return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps); +} + +static void +nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls) +{ + struct nfs4_client *clp = ls->ls_stid.sc_client; + struct block_device *bdev = ls->ls_file->f_path.mnt->mnt_sb->s_bdev; + + bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY, + nfsd4_scsi_pr_key(clp), 0, true); +} + +const struct nfsd4_layout_ops scsi_layout_ops = { + /* + * Pretend that we send notification to the client. This is a blatant + * lie to force recent Linux clients to cache our device IDs. + * We rarely ever change the device ID, so the harm of leaking deviceids + * for a while isn't too bad. Unfortunately RFC5661 is a complete mess + * in this regard, but I filed errata 4119 for this a while ago, and + * hopefully the Linux client will eventually start caching deviceids + * without this again. + */ + .notify_types = + NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE, + .proc_getdeviceinfo = nfsd4_scsi_proc_getdeviceinfo, + .encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo, + .proc_layoutget = nfsd4_block_proc_layoutget, + .encode_layoutget = nfsd4_block_encode_layoutget, + .proc_layoutcommit = nfsd4_scsi_proc_layoutcommit, + .fence_client = nfsd4_scsi_fence_client, +}; +#endif /* CONFIG_NFSD_SCSILAYOUT */ diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c index 6d834dc9b..6c3b316f9 100644 --- a/fs/nfsd/blocklayoutxdr.c +++ b/fs/nfsd/blocklayoutxdr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Christoph Hellwig. + * Copyright (c) 2014-2016 Christoph Hellwig. */ #include <linux/sunrpc/svc.h> #include <linux/exportfs.h> @@ -53,6 +53,18 @@ nfsd4_block_encode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b) p = xdr_encode_hyper(p, b->simple.offset); p = xdr_encode_opaque(p, b->simple.sig, b->simple.sig_len); break; + case PNFS_BLOCK_VOLUME_SCSI: + len = 4 + 4 + 4 + 4 + b->scsi.designator_len + 8; + p = xdr_reserve_space(xdr, len); + if (!p) + return -ETOOSMALL; + + *p++ = cpu_to_be32(b->type); + *p++ = cpu_to_be32(b->scsi.code_set); + *p++ = cpu_to_be32(b->scsi.designator_type); + p = xdr_encode_opaque(p, b->scsi.designator, b->scsi.designator_len); + p = xdr_encode_hyper(p, b->scsi.pr_key); + break; default: return -ENOTSUPP; } @@ -93,18 +105,22 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, u32 block_size) { struct iomap *iomaps; - u32 nr_iomaps, expected, i; + u32 nr_iomaps, i; if (len < sizeof(u32)) { dprintk("%s: extent array too small: %u\n", __func__, len); return -EINVAL; } + len -= sizeof(u32); + if (len % PNFS_BLOCK_EXTENT_SIZE) { + dprintk("%s: extent array invalid: %u\n", __func__, len); + return -EINVAL; + } nr_iomaps = be32_to_cpup(p++); - expected = sizeof(__be32) + nr_iomaps * PNFS_BLOCK_EXTENT_SIZE; - if (len != expected) { + if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE) { dprintk("%s: extent array size mismatch: %u/%u\n", - __func__, len, expected); + __func__, len, nr_iomaps); return -EINVAL; } @@ -155,3 +171,54 @@ fail: kfree(iomaps); return -EINVAL; } + +int +nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, + u32 block_size) +{ + struct iomap *iomaps; + u32 nr_iomaps, expected, i; + + if (len < sizeof(u32)) { + dprintk("%s: extent array too small: %u\n", __func__, len); + return -EINVAL; + } + + nr_iomaps = be32_to_cpup(p++); + expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE; + if (len != expected) { + dprintk("%s: extent array size mismatch: %u/%u\n", + __func__, len, expected); + return -EINVAL; + } + + iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL); + if (!iomaps) { + dprintk("%s: failed to allocate extent array\n", __func__); + return -ENOMEM; + } + + for (i = 0; i < nr_iomaps; i++) { + u64 val; + + p = xdr_decode_hyper(p, &val); + if (val & (block_size - 1)) { + dprintk("%s: unaligned offset 0x%llx\n", __func__, val); + goto fail; + } + iomaps[i].offset = val; + + p = xdr_decode_hyper(p, &val); + if (val & (block_size - 1)) { + dprintk("%s: unaligned length 0x%llx\n", __func__, val); + goto fail; + } + iomaps[i].length = val; + } + + *iomapp = iomaps; + return nr_iomaps; +fail: + kfree(iomaps); + return -EINVAL; +} diff --git a/fs/nfsd/blocklayoutxdr.h b/fs/nfsd/blocklayoutxdr.h index 6de925fe8..397bc7563 100644 --- a/fs/nfsd/blocklayoutxdr.h +++ b/fs/nfsd/blocklayoutxdr.h @@ -15,6 +15,11 @@ struct pnfs_block_extent { enum pnfs_block_extent_state es; }; +struct pnfs_block_range { + u64 foff; + u64 len; +}; + /* * Random upper cap for the uuid length to avoid unbounded allocation. * Not actually limited by the protocol. @@ -29,6 +34,13 @@ struct pnfs_block_volume { u32 sig_len; u8 sig[PNFS_BLOCK_UUID_LEN]; } simple; + struct { + enum scsi_code_set code_set; + enum scsi_designator_type designator_type; + int designator_len; + u8 designator[256]; + u64 pr_key; + } scsi; }; }; @@ -43,5 +55,7 @@ __be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr, struct nfsd4_layoutget *lgp); int nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, u32 block_size); +int nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, + u32 block_size); #endif /* _NFSD_BLOCKLAYOUTXDR_H */ diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 7b755b7f7..51c3b06e8 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -147,6 +147,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, { __be32 nfserr; u32 max_blocksize = svc_max_payload(rqstp); + unsigned long cnt = min(argp->count, max_blocksize); dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n", SVCFH_fmt(&argp->fh), @@ -157,7 +158,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) * + 1 (xdr opaque byte count) = 26 */ - resp->count = min(argp->count, max_blocksize); + resp->count = cnt; svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); fh_copy(&resp->fh, &argp->fh); @@ -167,8 +168,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, &resp->count); if (nfserr == 0) { struct inode *inode = d_inode(resp->fh.fh_dentry); - - resp->eof = (argp->offset + resp->count) >= inode->i_size; + resp->eof = nfsd_eof_on_read(cnt, resp->count, argp->offset, + inode->i_size); } RETURN_STATUS(nfserr); diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index ce2d010d3..825c7bc8d 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2014 Christoph Hellwig. */ +#include <linux/blkdev.h> #include <linux/kmod.h> #include <linux/file.h> #include <linux/jhash.h> @@ -26,7 +27,12 @@ static const struct nfsd4_callback_ops nfsd4_cb_layout_ops; static const struct lock_manager_operations nfsd4_layouts_lm_ops; const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] = { +#ifdef CONFIG_NFSD_BLOCKLAYOUT [LAYOUT_BLOCK_VOLUME] = &bl_layout_ops, +#endif +#ifdef CONFIG_NFSD_SCSILAYOUT + [LAYOUT_SCSI] = &scsi_layout_ops, +#endif }; /* pNFS device ID to export fsid mapping */ @@ -121,10 +127,24 @@ void nfsd4_setup_layout_type(struct svc_export *exp) if (!(exp->ex_flags & NFSEXP_PNFS)) return; + /* + * Check if the file system supports exporting a block-like layout. + * If the block device supports reservations prefer the SCSI layout, + * otherwise advertise the block layout. + */ +#ifdef CONFIG_NFSD_BLOCKLAYOUT if (sb->s_export_op->get_uuid && sb->s_export_op->map_blocks && sb->s_export_op->commit_blocks) exp->ex_layout_type = LAYOUT_BLOCK_VOLUME; +#endif +#ifdef CONFIG_NFSD_SCSILAYOUT + /* overwrite block layout selection if needed */ + if (sb->s_export_op->map_blocks && + sb->s_export_op->commit_blocks && + sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops) + exp->ex_layout_type = LAYOUT_SCSI; +#endif } static void @@ -590,8 +610,6 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls) rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str)); - trace_layout_recall_fail(&ls->ls_stid.sc_stateid); - printk(KERN_WARNING "nfsd: client %s failed to respond to layout recall. " " Fencing..\n", addr_str); @@ -626,6 +644,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) container_of(cb, struct nfs4_layout_stateid, ls_recall); struct nfsd_net *nn; ktime_t now, cutoff; + const struct nfsd4_layout_ops *ops; LIST_HEAD(reaplist); @@ -661,7 +680,13 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) /* * Unknown error or non-responding client, we'll need to fence. */ - nfsd4_cb_layout_fail(ls); + trace_layout_recall_fail(&ls->ls_stid.sc_stateid); + + ops = nfsd4_layout_ops[ls->ls_layout_type]; + if (ops->fence_client) + ops->fence_client(ls); + else + nfsd4_cb_layout_fail(ls); return -1; } } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index f8082c7cd..de1ff1d98 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -864,12 +864,10 @@ static __be32 nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_secinfo *secinfo) { - struct svc_fh resfh; struct svc_export *exp; struct dentry *dentry; __be32 err; - fh_init(&resfh, NFS4_FHSIZE); err = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, NFSD_MAY_EXEC); if (err) return err; @@ -1270,8 +1268,10 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp, goto out; nfserr = nfs_ok; - if (gdp->gd_maxcount != 0) - nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp); + if (gdp->gd_maxcount != 0) { + nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, + cstate->session->se_client, gdp); + } gdp->gd_notify_types &= ops->notify_types; out: diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index dc8ebecf5..66eaeb1e8 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -32,10 +32,10 @@ * */ +#include <crypto/hash.h> #include <linux/file.h> #include <linux/slab.h> #include <linux/namei.h> -#include <linux/crypto.h> #include <linux/sched.h> #include <linux/fs.h> #include <linux/module.h> @@ -104,29 +104,35 @@ static int nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname) { struct xdr_netobj cksum; - struct hash_desc desc; - struct scatterlist sg; + struct crypto_shash *tfm; int status; dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n", clname->len, clname->data); - desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; - desc.tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(desc.tfm)) { - status = PTR_ERR(desc.tfm); + tfm = crypto_alloc_shash("md5", 0, 0); + if (IS_ERR(tfm)) { + status = PTR_ERR(tfm); goto out_no_tfm; } - cksum.len = crypto_hash_digestsize(desc.tfm); + cksum.len = crypto_shash_digestsize(tfm); cksum.data = kmalloc(cksum.len, GFP_KERNEL); if (cksum.data == NULL) { status = -ENOMEM; goto out; } - sg_init_one(&sg, clname->data, clname->len); + { + SHASH_DESC_ON_STACK(desc, tfm); + + desc->tfm = tfm; + desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; + + status = crypto_shash_digest(desc, clname->data, clname->len, + cksum.data); + shash_desc_zero(desc); + } - status = crypto_hash_digest(&desc, &sg, sg.length, cksum.data); if (status) goto out; @@ -135,7 +141,7 @@ nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname) status = 0; out: kfree(cksum.data); - crypto_free_hash(desc.tfm); + crypto_free_shash(tfm); out_no_tfm: return status; } @@ -1260,6 +1266,7 @@ nfsd4_umh_cltrack_init(struct net *net) /* XXX: The usermode helper s not working in container yet. */ if (net != &init_net) { pr_warn("NFSD: attempt to initialize umh client tracking in a container ignored.\n"); + kfree(grace_start); return -EINVAL; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c484a2b6c..0462eeddf 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2408,7 +2408,8 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, default: /* checked by xdr code */ WARN_ON_ONCE(1); case SP4_SSV: - return nfserr_encr_alg_unsupp; + status = nfserr_encr_alg_unsupp; + goto out_nolock; } /* Cases below refer to rfc 5661 section 18.35.4: */ @@ -2586,21 +2587,26 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs return nfs_ok; } +/* + * Server's NFSv4.1 backchannel support is AUTH_SYS-only for now. + * These are based on similar macros in linux/sunrpc/msg_prot.h . + */ +#define RPC_MAX_HEADER_WITH_AUTH_SYS \ + (RPC_CALLHDRSIZE + 2 * (2 + UNX_CALLSLACK)) + +#define RPC_MAX_REPHEADER_WITH_AUTH_SYS \ + (RPC_REPHDRSIZE + (2 + NUL_REPLYSLACK)) + #define NFSD_CB_MAX_REQ_SZ ((NFS4_enc_cb_recall_sz + \ - RPC_MAX_HEADER_WITH_AUTH) * sizeof(__be32)) + RPC_MAX_HEADER_WITH_AUTH_SYS) * sizeof(__be32)) #define NFSD_CB_MAX_RESP_SZ ((NFS4_dec_cb_recall_sz + \ - RPC_MAX_REPHEADER_WITH_AUTH) * sizeof(__be32)) + RPC_MAX_REPHEADER_WITH_AUTH_SYS) * \ + sizeof(__be32)) static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca) { ca->headerpadsz = 0; - /* - * These RPC_MAX_HEADER macros are overkill, especially since we - * don't even do gss on the backchannel yet. But this is still - * less than 1k. Tighten up this estimate in the unlikely event - * it turns out to be a problem for some client: - */ if (ca->maxreq_sz < NFSD_CB_MAX_REQ_SZ) return nfserr_toosmall; if (ca->maxresp_sz < NFSD_CB_MAX_RESP_SZ) @@ -2710,10 +2716,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, goto out_free_conn; } status = nfs_ok; - /* - * We do not support RDMA or persistent sessions - */ + /* Persistent sessions are not supported */ cr_ses->flags &= ~SESSION4_PERSIST; + /* Upshifting from TCP to RDMA is not supported */ cr_ses->flags &= ~SESSION4_RDMA; init_session(rqstp, new, conf, cr_ses); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1600ec470..9df898ba6 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3063,7 +3063,7 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, p = xdr_encode_opaque_fixed(p, bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); *p++ = cpu_to_be32(bcts->dir); - /* Sorry, we do not yet support RDMA over 4.1: */ + /* Upshifting from TCP to RDMA is not supported */ *p++ = cpu_to_be32(0); } return nfserr; @@ -3365,6 +3365,7 @@ static __be32 nfsd4_encode_splice_read( struct xdr_stream *xdr = &resp->xdr; struct xdr_buf *buf = xdr->buf; u32 eof; + long len; int space_left; __be32 nfserr; __be32 *p = xdr->p - 2; @@ -3373,6 +3374,7 @@ static __be32 nfsd4_encode_splice_read( if (xdr->end - xdr->p < 1) return nfserr_resource; + len = maxcount; nfserr = nfsd_splice_read(read->rd_rqstp, file, read->rd_offset, &maxcount); if (nfserr) { @@ -3385,8 +3387,8 @@ static __be32 nfsd4_encode_splice_read( return nfserr; } - eof = (read->rd_offset + maxcount >= - d_inode(read->rd_fhp->fh_dentry)->i_size); + eof = nfsd_eof_on_read(len, maxcount, read->rd_offset, + d_inode(read->rd_fhp->fh_dentry)->i_size); *(p++) = htonl(eof); *(p++) = htonl(maxcount); @@ -3456,14 +3458,15 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, } read->rd_vlen = v; + len = maxcount; nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, &maxcount); if (nfserr) return nfserr; xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3)); - eof = (read->rd_offset + maxcount >= - d_inode(read->rd_fhp->fh_dentry)->i_size); + eof = nfsd_eof_on_read(len, maxcount, read->rd_offset, + d_inode(read->rd_fhp->fh_dentry)->i_size); tmp = htonl(eof); write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4); diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h index d4c445367..7d073b9b1 100644 --- a/fs/nfsd/pnfs.h +++ b/fs/nfsd/pnfs.h @@ -21,6 +21,7 @@ struct nfsd4_layout_ops { u32 notify_types; __be32 (*proc_getdeviceinfo)(struct super_block *sb, + struct nfs4_client *clp, struct nfsd4_getdeviceinfo *gdevp); __be32 (*encode_getdeviceinfo)(struct xdr_stream *xdr, struct nfsd4_getdeviceinfo *gdevp); @@ -32,10 +33,17 @@ struct nfsd4_layout_ops { __be32 (*proc_layoutcommit)(struct inode *inode, struct nfsd4_layoutcommit *lcp); + + void (*fence_client)(struct nfs4_layout_stateid *ls); }; extern const struct nfsd4_layout_ops *nfsd4_layout_ops[]; +#ifdef CONFIG_NFSD_BLOCKLAYOUT extern const struct nfsd4_layout_ops bl_layout_ops; +#endif +#ifdef CONFIG_NFSD_SCSILAYOUT +extern const struct nfsd4_layout_ops scsi_layout_ops; +#endif __be32 nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stateid_t *stateid, diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 5d2a57e4c..d40010e4f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -870,7 +870,7 @@ __be32 nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen, oldfs = get_fs(); set_fs(KERNEL_DS); - host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset); + host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset, 0); set_fs(oldfs); return nfsd_finish_read(file, count, host_err); } @@ -957,7 +957,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, /* Write the data. */ oldfs = get_fs(); set_fs(KERNEL_DS); - host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos); + host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos, 0); set_fs(oldfs); if (host_err < 0) goto out_nfserr; diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index c11ba316f..2d573ec05 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -139,4 +139,23 @@ static inline int nfsd_create_is_exclusive(int createmode) || createmode == NFS4_CREATE_EXCLUSIVE4_1; } +static inline bool nfsd_eof_on_read(long requested, long read, + loff_t offset, loff_t size) +{ + /* We assume a short read means eof: */ + if (requested > read) + return true; + /* + * A non-short read might also reach end of file. The spec + * still requires us to set eof in that case. + * + * Further operations may have modified the file size since + * the read, so the following check is not atomic with the read. + * We've only seen that cause a problem for a client in the case + * where the read returned a count of 0 without setting eof. + * That case was fixed by the addition of the above check. + */ + return (offset + read >= size); +} + #endif /* LINUX_NFSD_VFS_H */ |