diff options
Diffstat (limited to 'fs')
46 files changed, 489 insertions, 148 deletions
diff --git a/fs/aufs/cpup.c b/fs/aufs/cpup.c index a34648874..d34a6d283 100644 --- a/fs/aufs/cpup.c +++ b/fs/aufs/cpup.c @@ -512,6 +512,55 @@ out: return err; } +/* + * regardless 'acl' option, reset all ACL. + * All ACL will be copied up later from the original entry on the lower branch. + */ +static int au_reset_acl(struct inode *h_dir, struct path *h_path, umode_t mode) +{ + int err; + struct dentry *h_dentry; + struct inode *h_inode; + + h_dentry = h_path->dentry; + h_inode = d_inode(h_dentry); + /* forget_all_cached_acls(h_inode)); */ + err = vfsub_removexattr(h_dentry, XATTR_NAME_POSIX_ACL_ACCESS); + AuTraceErr(err); + if (!err) + err = vfsub_acl_chmod(h_inode, mode); + + AuTraceErr(err); + return err; +} + +static int au_do_cpup_dir(struct au_cp_generic *cpg, struct dentry *dst_parent, + struct inode *h_dir, struct path *h_path) +{ + int err; + struct inode *dir, *inode; + + err = vfsub_removexattr(h_path->dentry, XATTR_NAME_POSIX_ACL_DEFAULT); + AuTraceErr(err); + if (err == -EOPNOTSUPP) + err = 0; + if (unlikely(err)) + goto out; + + /* + * strange behaviour from the users view, + * particularry setattr case + */ + dir = d_inode(dst_parent); + if (au_ibstart(dir) == cpg->bdst) + au_cpup_attr_nlink(dir, /*force*/1); + inode = d_inode(cpg->dentry); + au_cpup_attr_nlink(inode, /*force*/1); + +out: + return err; +} + static noinline_for_stack int cpup_entry(struct au_cp_generic *cpg, struct dentry *dst_parent, struct au_cpup_reg_attr *h_src_attr) @@ -524,7 +573,7 @@ int cpup_entry(struct au_cp_generic *cpg, struct dentry *dst_parent, struct au_dtime dt; struct path h_path; struct dentry *h_src, *h_dst, *h_parent; - struct inode *h_inode, *h_dir, *dir, *inode; + struct inode *h_inode, *h_dir; struct super_block *sb; /* bsrc branch can be ro/rw. */ @@ -564,17 +613,8 @@ int cpup_entry(struct au_cp_generic *cpg, struct dentry *dst_parent, case S_IFDIR: isdir = 1; err = vfsub_mkdir(h_dir, &h_path, mode); - if (!err) { - /* - * strange behaviour from the users view, - * particularry setattr case - */ - dir = d_inode(dst_parent); - if (au_ibstart(dir) == cpg->bdst) - au_cpup_attr_nlink(dir, /*force*/1); - inode = d_inode(cpg->dentry); - au_cpup_attr_nlink(inode, /*force*/1); - } + if (!err) + err = au_do_cpup_dir(cpg, dst_parent, h_dir, &h_path); break; case S_IFLNK: err = au_do_cpup_symlink(&h_path, h_src, h_dir); @@ -591,6 +631,8 @@ int cpup_entry(struct au_cp_generic *cpg, struct dentry *dst_parent, AuIOErr("Unknown inode type 0%o\n", mode); err = -EIO; } + if (!err) + err = au_reset_acl(h_dir, &h_path, mode); mnt_flags = au_mntflags(sb); if (!au_opt_test(mnt_flags, UDBA_NONE) diff --git a/fs/aufs/dir.c b/fs/aufs/dir.c index 8a619062a..a994e0862 100644 --- a/fs/aufs/dir.c +++ b/fs/aufs/dir.c @@ -237,11 +237,13 @@ static int do_open_dir(struct file *file, int flags, struct file *h_file) int err; aufs_bindex_t bindex, btail; struct dentry *dentry, *h_dentry; + struct vfsmount *mnt; FiMustWriteLock(file); AuDebugOn(h_file); err = 0; + mnt = file->f_path.mnt; dentry = file->f_path.dentry; file->f_version = d_inode(dentry)->i_version; bindex = au_dbstart(dentry); @@ -253,6 +255,9 @@ static int do_open_dir(struct file *file, int flags, struct file *h_file) if (!h_dentry) continue; + err = vfsub_test_mntns(mnt, h_dentry->d_sb); + if (unlikely(err)) + break; h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0); if (IS_ERR(h_file)) { err = PTR_ERR(h_file); diff --git a/fs/aufs/export.c b/fs/aufs/export.c index 4ce7732b7..7f6fec61f 100644 --- a/fs/aufs/export.c +++ b/fs/aufs/export.c @@ -597,7 +597,7 @@ aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, ino = decode_ino(fh + Fh_ino); /* it should never happen */ if (unlikely(ino == AUFS_ROOT_INO)) - goto out; + goto out_unlock; dir_ino = decode_ino(fh + Fh_dir_ino); dentry = decode_by_ino(sb, ino, dir_ino); diff --git a/fs/aufs/f_op.c b/fs/aufs/f_op.c index c1be75cb0..2baacd7a6 100644 --- a/fs/aufs/f_op.c +++ b/fs/aufs/f_op.c @@ -16,7 +16,7 @@ int au_do_open_nondir(struct file *file, int flags, struct file *h_file) { int err; aufs_bindex_t bindex; - struct dentry *dentry; + struct dentry *dentry, *h_dentry; struct au_finfo *finfo; struct inode *h_inode; @@ -29,10 +29,19 @@ int au_do_open_nondir(struct file *file, int flags, struct file *h_file) memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop)); atomic_set(&finfo->fi_mmapped, 0); bindex = au_dbstart(dentry); - if (!h_file) + if (!h_file) { + h_dentry = au_h_dptr(dentry, bindex); + err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb); + if (unlikely(err)) + goto out; h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0); - else + } else { + h_dentry = h_file->f_path.dentry; + err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb); + if (unlikely(err)) + goto out; get_file(h_file); + } if (IS_ERR(h_file)) err = PTR_ERR(h_file); else { @@ -50,6 +59,7 @@ int au_do_open_nondir(struct file *file, int flags, struct file *h_file) /* file->f_ra = h_file->f_ra; */ } +out: return err; } diff --git a/fs/aufs/fstype.h b/fs/aufs/fstype.h index e429a8b17..725b2ffff 100644 --- a/fs/aufs/fstype.h +++ b/fs/aufs/fstype.h @@ -240,7 +240,6 @@ static inline int au_test_hfsplus(struct super_block *sb __maybe_unused) static inline int au_test_fs_unsuppoted(struct super_block *sb) { return - au_test_fuse(sb) || /* for a security reason, temporarily */ #ifndef CONFIG_AUFS_BR_RAMFS au_test_ramfs(sb) || #endif diff --git a/fs/aufs/i_op.c b/fs/aufs/i_op.c index 1c634bab9..6e50526d8 100644 --- a/fs/aufs/i_op.c +++ b/fs/aufs/i_op.c @@ -963,6 +963,12 @@ static int aufs_setattr(struct dentry *dentry, struct iattr *ia) break; } } + /* + * regardless aufs 'acl' option setting. + * why don't all acl-aware fs call this func from their ->setattr()? + */ + if (!err && (ia->ia_valid & ATTR_MODE)) + err = vfsub_acl_chmod(a->h_inode, ia->ia_mode); if (!err) au_cpup_attr_changeable(inode); diff --git a/fs/aufs/module.c b/fs/aufs/module.c index ec12f2e66..8a28377c5 100644 --- a/fs/aufs/module.c +++ b/fs/aufs/module.c @@ -95,7 +95,7 @@ MODULE_PARM_DESC(brs, "use <sysfs>/fs/aufs/si_*/brN"); module_param_named(brs, sysaufs_brs, int, S_IRUGO); /* this module parameter has no meaning when USER_NS is disabled */ -static bool au_userns; +bool au_userns; MODULE_PARM_DESC(allow_userns, "allow unprivileged to mount under userns"); module_param_named(allow_userns, au_userns, bool, S_IRUGO); diff --git a/fs/aufs/module.h b/fs/aufs/module.h index b129ad4b6..bb8644730 100644 --- a/fs/aufs/module.h +++ b/fs/aufs/module.h @@ -18,6 +18,7 @@ struct seq_file; /* module parameters */ extern int sysaufs_brs; +extern bool au_userns; /* ---------------------------------------------------------------------- */ diff --git a/fs/aufs/posix_acl.c b/fs/aufs/posix_acl.c index 1c19e629b..a3c442c08 100644 --- a/fs/aufs/posix_acl.c +++ b/fs/aufs/posix_acl.c @@ -7,7 +7,6 @@ */ #include <linux/fs.h> -#include <linux/posix_acl.h> #include "aufs.h" struct posix_acl *aufs_get_acl(struct inode *inode, int type) diff --git a/fs/aufs/vfsub.c b/fs/aufs/vfsub.c index f072c59c0..89f999c97 100644 --- a/fs/aufs/vfsub.c +++ b/fs/aufs/vfsub.c @@ -7,10 +7,28 @@ */ #include <linux/namei.h> +#include <linux/nsproxy.h> #include <linux/security.h> #include <linux/splice.h> +#include "../fs/mount.h" #include "aufs.h" +#ifdef CONFIG_AUFS_BR_FUSE +int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb) +{ + struct nsproxy *ns; + + if (!au_test_fuse(h_sb) || !au_userns) + return 0; + + ns = current->nsproxy; + /* no {get,put}_nsproxy(ns) */ + return real_mount(mnt)->mnt_ns == ns->mnt_ns ? 0 : -EACCES; +} +#endif + +/* ---------------------------------------------------------------------- */ + int vfsub_update_h_iattr(struct path *h_path, int *did) { int err; diff --git a/fs/aufs/vfsub.h b/fs/aufs/vfsub.h index a7d8a1cf9..f2e1c49af 100644 --- a/fs/aufs/vfsub.h +++ b/fs/aufs/vfsub.h @@ -13,6 +13,7 @@ #include <linux/fs.h> #include <linux/mount.h> +#include <linux/posix_acl.h> #include <linux/xattr.h> #include "debug.h" @@ -63,6 +64,12 @@ static inline int vfsub_native_ro(struct inode *inode) || IS_IMMUTABLE(inode); } +#ifdef CONFIG_AUFS_BR_FUSE +int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb); +#else +AuStubInt0(vfsub_test_mntns, struct vfsmount *mnt, struct super_block *h_sb); +#endif + /* ---------------------------------------------------------------------- */ int vfsub_update_h_iattr(struct path *h_path, int *did); @@ -202,6 +209,20 @@ static inline int vfsub_update_time(struct inode *h_inode, struct timespec *ts, /* no vfsub_update_h_iattr() since we don't have struct path */ } +#ifdef CONFIG_FS_POSIX_ACL +static inline int vfsub_acl_chmod(struct inode *h_inode, umode_t h_mode) +{ + int err; + + err = posix_acl_chmod(h_inode, h_mode); + if (err == -EOPNOTSUPP) + err = 0; + return err; +} +#else +AuStubInt0(vfsub_acl_chmod, struct inode *h_inode, umode_t h_mode); +#endif + long vfsub_splice_to(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index d453d62ab..e2f659dc5 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1417,7 +1417,8 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, read_extent_buffer(eb, dest + bytes_left, name_off, name_len); if (eb != eb_in) { - btrfs_tree_read_unlock_blocking(eb); + if (!path->skip_locking) + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); } ret = btrfs_find_item(fs_root, path, parent, 0, @@ -1437,9 +1438,10 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, eb = path->nodes[0]; /* make sure we can use eb after releasing the path */ if (eb != eb_in) { - atomic_inc(&eb->refs); - btrfs_tree_read_lock(eb); - btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); + if (!path->skip_locking) + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); + path->nodes[0] = NULL; + path->locks[0] = 0; } btrfs_release_path(path); iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index e0941fbb9..02b934d0e 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1694,7 +1694,7 @@ int btrfs_should_delete_dir_index(struct list_head *del_list, * */ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, - struct list_head *ins_list) + struct list_head *ins_list, bool *emitted) { struct btrfs_dir_item *di; struct btrfs_delayed_item *curr, *next; @@ -1738,6 +1738,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, if (over) return 1; + *emitted = true; } return 0; } diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index f70119f25..0167853c8 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -144,7 +144,7 @@ void btrfs_put_delayed_items(struct list_head *ins_list, int btrfs_should_delete_dir_index(struct list_head *del_list, u64 index); int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, - struct list_head *ins_list); + struct list_head *ins_list, bool *emitted); /* for init */ int __init btrfs_delayed_inode_init(void); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a70c5790f..54b5f0de6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5741,6 +5741,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) char *name_ptr; int name_len; int is_curr = 0; /* ctx->pos points to the current index? */ + bool emitted; /* FIXME, use a real flag for deciding about the key type */ if (root->fs_info->tree_root == root) @@ -5769,6 +5770,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) if (ret < 0) goto err; + emitted = false; while (1) { leaf = path->nodes[0]; slot = path->slots[0]; @@ -5848,6 +5850,7 @@ skip: if (over) goto nopos; + emitted = true; di_len = btrfs_dir_name_len(leaf, di) + btrfs_dir_data_len(leaf, di) + sizeof(*di); di_cur += di_len; @@ -5860,11 +5863,20 @@ next: if (key_type == BTRFS_DIR_INDEX_KEY) { if (is_curr) ctx->pos++; - ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list); + ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list, &emitted); if (ret) goto nopos; } + /* + * If we haven't emitted any dir entry, we must not touch ctx->pos as + * it was was set to the termination value in previous call. We assume + * that "." and ".." were emitted if we reach this point and set the + * termination value as well for an empty directory. + */ + if (ctx->pos > 2 && !emitted) + goto nopos; + /* Reached end of directory/root. Bump pos past the last item. */ ctx->pos++; @@ -7985,6 +7997,7 @@ static void btrfs_endio_direct_read(struct bio *bio) kfree(dip); + dio_bio->bi_error = bio->bi_error; dio_end_io(dio_bio, bio->bi_error); if (io_bio->end_io) @@ -8030,6 +8043,7 @@ out_test: kfree(dip); + dio_bio->bi_error = bio->bi_error; dio_end_io(dio_bio, bio->bi_error); bio_put(bio); } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index da94138eb..08fd3f0f3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2782,24 +2782,29 @@ out: static struct page *extent_same_get_page(struct inode *inode, pgoff_t index) { struct page *page; - struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; page = grab_cache_page(inode->i_mapping, index); if (!page) - return NULL; + return ERR_PTR(-ENOMEM); if (!PageUptodate(page)) { - if (extent_read_full_page_nolock(tree, page, btrfs_get_extent, - 0)) - return NULL; + int ret; + + ret = btrfs_readpage(NULL, page); + if (ret) + return ERR_PTR(ret); lock_page(page); if (!PageUptodate(page)) { unlock_page(page); page_cache_release(page); - return NULL; + return ERR_PTR(-EIO); + } + if (page->mapping != inode->i_mapping) { + unlock_page(page); + page_cache_release(page); + return ERR_PTR(-EAGAIN); } } - unlock_page(page); return page; } @@ -2811,17 +2816,31 @@ static int gather_extent_pages(struct inode *inode, struct page **pages, pgoff_t index = off >> PAGE_CACHE_SHIFT; for (i = 0; i < num_pages; i++) { +again: pages[i] = extent_same_get_page(inode, index + i); - if (!pages[i]) - return -ENOMEM; + if (IS_ERR(pages[i])) { + int err = PTR_ERR(pages[i]); + + if (err == -EAGAIN) + goto again; + pages[i] = NULL; + return err; + } } return 0; } -static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) +static int lock_extent_range(struct inode *inode, u64 off, u64 len, + bool retry_range_locking) { - /* do any pending delalloc/csum calc on src, one way or - another, and lock file content */ + /* + * Do any pending delalloc/csum calculations on inode, one way or + * another, and lock file content. + * The locking order is: + * + * 1) pages + * 2) range in the inode's io tree + */ while (1) { struct btrfs_ordered_extent *ordered; lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); @@ -2839,8 +2858,11 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); if (ordered) btrfs_put_ordered_extent(ordered); + if (!retry_range_locking) + return -EAGAIN; btrfs_wait_ordered_range(inode, off, len); } + return 0; } static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2) @@ -2865,15 +2887,24 @@ static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); } -static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, - struct inode *inode2, u64 loff2, u64 len) +static int btrfs_double_extent_lock(struct inode *inode1, u64 loff1, + struct inode *inode2, u64 loff2, u64 len, + bool retry_range_locking) { + int ret; + if (inode1 < inode2) { swap(inode1, inode2); swap(loff1, loff2); } - lock_extent_range(inode1, loff1, len); - lock_extent_range(inode2, loff2, len); + ret = lock_extent_range(inode1, loff1, len, retry_range_locking); + if (ret) + return ret; + ret = lock_extent_range(inode2, loff2, len, retry_range_locking); + if (ret) + unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, + loff1 + len - 1); + return ret; } struct cmp_pages { @@ -2889,11 +2920,15 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp) for (i = 0; i < cmp->num_pages; i++) { pg = cmp->src_pages[i]; - if (pg) + if (pg) { + unlock_page(pg); page_cache_release(pg); + } pg = cmp->dst_pages[i]; - if (pg) + if (pg) { + unlock_page(pg); page_cache_release(pg); + } } kfree(cmp->src_pages); kfree(cmp->dst_pages); @@ -2954,6 +2989,8 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, src_page = cmp->src_pages[i]; dst_page = cmp->dst_pages[i]; + ASSERT(PageLocked(src_page)); + ASSERT(PageLocked(dst_page)); addr = kmap_atomic(src_page); dst_addr = kmap_atomic(dst_page); @@ -3066,14 +3103,46 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, goto out_unlock; } +again: ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp); if (ret) goto out_unlock; if (same_inode) - lock_extent_range(src, same_lock_start, same_lock_len); + ret = lock_extent_range(src, same_lock_start, same_lock_len, + false); else - btrfs_double_extent_lock(src, loff, dst, dst_loff, len); + ret = btrfs_double_extent_lock(src, loff, dst, dst_loff, len, + false); + /* + * If one of the inodes has dirty pages in the respective range or + * ordered extents, we need to flush dellaloc and wait for all ordered + * extents in the range. We must unlock the pages and the ranges in the + * io trees to avoid deadlocks when flushing delalloc (requires locking + * pages) and when waiting for ordered extents to complete (they require + * range locking). + */ + if (ret == -EAGAIN) { + /* + * Ranges in the io trees already unlocked. Now unlock all + * pages before waiting for all IO to complete. + */ + btrfs_cmp_data_free(&cmp); + if (same_inode) { + btrfs_wait_ordered_range(src, same_lock_start, + same_lock_len); + } else { + btrfs_wait_ordered_range(src, loff, len); + btrfs_wait_ordered_range(dst, dst_loff, len); + } + goto again; + } + ASSERT(ret == 0); + if (WARN_ON(ret)) { + /* ranges in the io trees already unlocked */ + btrfs_cmp_data_free(&cmp); + return ret; + } /* pass original length for comparison so we stay within i_size */ ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); @@ -3895,9 +3964,15 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, u64 lock_start = min_t(u64, off, destoff); u64 lock_len = max_t(u64, off, destoff) + len - lock_start; - lock_extent_range(src, lock_start, lock_len); + ret = lock_extent_range(src, lock_start, lock_len, true); } else { - btrfs_double_extent_lock(src, off, inode, destoff, len); + ret = btrfs_double_extent_lock(src, off, inode, destoff, len, + true); + } + ASSERT(ret == 0); + if (WARN_ON(ret)) { + /* ranges in the io trees already unlocked */ + goto out_unlock; } ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a23399e8e..9e084477d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1257,6 +1257,15 @@ int find_free_dev_extent_start(struct btrfs_transaction *transaction, int ret; int slot; struct extent_buffer *l; + u64 min_search_start; + + /* + * We don't want to overwrite the superblock on the drive nor any area + * used by the boot loader (grub for example), so we make sure to start + * at an offset of at least 1MB. + */ + min_search_start = max(root->fs_info->alloc_start, 1024ull * 1024); + search_start = max(search_start, min_search_start); path = btrfs_alloc_path(); if (!path) @@ -1397,18 +1406,9 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 num_bytes, u64 *start, u64 *len) { - struct btrfs_root *root = device->dev_root; - u64 search_start; - /* FIXME use last free of some kind */ - - /* - * we don't want to overwrite the superblock on the drive, - * so we make sure to start at an offset of at least 1MB - */ - search_start = max(root->fs_info->alloc_start, 1024ull * 1024); return find_free_dev_extent_start(trans->transaction, device, - num_bytes, search_start, start, len); + num_bytes, 0, start, len); } static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, @@ -6512,6 +6512,14 @@ int btrfs_read_sys_array(struct btrfs_root *root) goto out_short_read; num_stripes = btrfs_chunk_num_stripes(sb, chunk); + if (!num_stripes) { + printk(KERN_ERR + "BTRFS: invalid number of stripes %u in sys_array at offset %u\n", + num_stripes, cur_offset); + ret = -EIO; + break; + } + len = btrfs_chunk_item_size(num_stripes); if (cur_offset + len > array_size) goto out_short_read; diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 7febcf247..50b268483 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -50,7 +50,7 @@ void cifs_vfs_err(const char *fmt, ...) vaf.fmt = fmt; vaf.va = &args; - pr_err("CIFS VFS: %pV", &vaf); + pr_err_ratelimited("CIFS VFS: %pV", &vaf); va_end(args); } diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h index f40fbaca1..66cf0f9ff 100644 --- a/fs/cifs/cifs_debug.h +++ b/fs/cifs/cifs_debug.h @@ -51,14 +51,13 @@ __printf(1, 2) void cifs_vfs_err(const char *fmt, ...); /* information message: e.g., configuration, major event */ #define cifs_dbg(type, fmt, ...) \ do { \ - if (type == FYI) { \ - if (cifsFYI & CIFS_INFO) { \ - pr_debug("%s: " fmt, __FILE__, ##__VA_ARGS__); \ - } \ + if (type == FYI && cifsFYI & CIFS_INFO) { \ + pr_debug_ratelimited("%s: " \ + fmt, __FILE__, ##__VA_ARGS__); \ } else if (type == VFS) { \ cifs_vfs_err(fmt, ##__VA_ARGS__); \ } else if (type == NOISY && type != 0) { \ - pr_debug(fmt, ##__VA_ARGS__); \ + pr_debug_ratelimited(fmt, ##__VA_ARGS__); \ } \ } while (0) diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index afa09fce8..e682b36a2 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -714,7 +714,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) ses->auth_key.response = kmalloc(baselen + tilen, GFP_KERNEL); if (!ses->auth_key.response) { - rc = ENOMEM; + rc = -ENOMEM; ses->auth_key.len = 0; goto setup_ntlmv2_rsp_ret; } diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index ecb0803bd..3c194ff0d 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -368,7 +368,6 @@ cifs_reconnect(struct TCP_Server_Info *server) server->session_key.response = NULL; server->session_key.len = 0; server->lstrp = jiffies; - mutex_unlock(&server->srv_mutex); /* mark submitted MIDs for retry and issue callback */ INIT_LIST_HEAD(&retry_list); @@ -381,6 +380,7 @@ cifs_reconnect(struct TCP_Server_Info *server) list_move(&mid_entry->qhead, &retry_list); } spin_unlock(&GlobalMid_Lock); + mutex_unlock(&server->srv_mutex); cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__); list_for_each_safe(tmp, tmp2, &retry_list) { diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 0557c45e9..b30a4a6d9 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -847,6 +847,7 @@ int cifs_readdir(struct file *file, struct dir_context *ctx) * if buggy server returns . and .. late do we want to * check for that here? */ + *tmp_buf = 0; rc = cifs_filldir(current_entry, file, ctx, tmp_buf, max_len); if (rc) { diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 2a24c524f..87abe8ed0 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -576,14 +576,16 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, cifs_in_send_dec(server); cifs_save_when_sent(mid); - if (rc < 0) + if (rc < 0) { server->sequence_number -= 2; + cifs_delete_mid(mid); + } + mutex_unlock(&server->srv_mutex); if (rc == 0) return 0; - cifs_delete_mid(mid); add_credits_and_wake_if(server, credits, optype); return rc; } diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index c35ffdc12..706de324f 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -575,6 +575,26 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx) mutex_unlock(&allocated_ptys_lock); } +/* + * pty code needs to hold extra references in case of last /dev/tty close + */ + +void devpts_add_ref(struct inode *ptmx_inode) +{ + struct super_block *sb = pts_sb_from_inode(ptmx_inode); + + atomic_inc(&sb->s_active); + ihold(ptmx_inode); +} + +void devpts_del_ref(struct inode *ptmx_inode) +{ + struct super_block *sb = pts_sb_from_inode(ptmx_inode); + + iput(ptmx_inode); + deactivate_super(sb); +} + /** * devpts_pty_new -- create a new inode in /dev/pts/ * @ptmx_inode: inode of the master diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index ec0668a60..fe1f50fe7 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -191,7 +191,6 @@ static int ext4_init_block_bitmap(struct super_block *sb, /* If checksum is bad mark all blocks used to prevent allocation * essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { - ext4_error(sb, "Checksum bad for group %u", block_group); grp = ext4_get_group_info(sb, block_group); if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) percpu_counter_sub(&sbi->s_freeclusters_counter, @@ -442,14 +441,16 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) } ext4_lock_group(sb, block_group); if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { - err = ext4_init_block_bitmap(sb, bh, block_group, desc); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); ext4_unlock_group(sb, block_group); unlock_buffer(bh); - if (err) + if (err) { + ext4_error(sb, "Failed to init block bitmap for group " + "%u: %d", block_group, err); goto out; + } goto verify; } ext4_unlock_group(sb, block_group); diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 1b8024d26..53f2b98a6 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -76,7 +76,6 @@ static int ext4_init_inode_bitmap(struct super_block *sb, /* If checksum is bad mark all blocks and inodes use to prevent * allocation, essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { - ext4_error(sb, "Checksum bad for group %u", block_group); grp = ext4_get_group_info(sb, block_group); if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) percpu_counter_sub(&sbi->s_freeclusters_counter, @@ -191,8 +190,11 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) set_buffer_verified(bh); ext4_unlock_group(sb, block_group); unlock_buffer(bh); - if (err) + if (err) { + ext4_error(sb, "Failed to init inode bitmap for group " + "%u: %d", block_group, err); goto out; + } return bh; } ext4_unlock_group(sb, block_group); diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index fb6f11709..e032a0423 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -265,11 +265,12 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, ext4_lblk_t orig_blk_offset, donor_blk_offset; unsigned long blocksize = orig_inode->i_sb->s_blocksize; unsigned int tmp_data_size, data_size, replaced_size; - int err2, jblocks, retries = 0; + int i, err2, jblocks, retries = 0; int replaced_count = 0; int from = data_offset_in_page << orig_inode->i_blkbits; int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; struct super_block *sb = orig_inode->i_sb; + struct buffer_head *bh = NULL; /* * It needs twice the amount of ordinary journal buffers because @@ -380,8 +381,16 @@ data_copy: } /* Perform all necessary steps similar write_begin()/write_end() * but keeping in mind that i_size will not change */ - *err = __block_write_begin(pagep[0], from, replaced_size, - ext4_get_block); + if (!page_has_buffers(pagep[0])) + create_empty_buffers(pagep[0], 1 << orig_inode->i_blkbits, 0); + bh = page_buffers(pagep[0]); + for (i = 0; i < data_offset_in_page; i++) + bh = bh->b_this_page; + for (i = 0; i < block_len_in_page; i++) { + *err = ext4_get_block(orig_inode, orig_blk_offset + i, bh, 0); + if (*err < 0) + break; + } if (!*err) *err = block_commit_write(pagep[0], from, from + replaced_size); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index ad62d7acc..34038e359 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -198,7 +198,7 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size) if (flex_gd == NULL) goto out3; - if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_flex_group_data)) + if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data)) goto out2; flex_gd->count = flexbg_size; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index de4bdfac0..595ebdb41 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -463,6 +463,7 @@ hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end) */ vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) { unsigned long v_offset; + unsigned long v_end; /* * Can the expression below overflow on 32-bit arches? @@ -475,15 +476,17 @@ hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end) else v_offset = 0; - if (end) { - end = ((end - start) << PAGE_SHIFT) + - vma->vm_start + v_offset; - if (end > vma->vm_end) - end = vma->vm_end; - } else - end = vma->vm_end; + if (!end) + v_end = vma->vm_end; + else { + v_end = ((end - vma->vm_pgoff) << PAGE_SHIFT) + + vma->vm_start; + if (v_end > vma->vm_end) + v_end = vma->vm_end; + } - unmap_hugepage_range(vma, vma->vm_start + v_offset, end, NULL); + unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end, + NULL); } } diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 03516c808..2a2e2d8dd 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -145,7 +145,7 @@ static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1, return false; for (i = 0; i < m1->fh_versions_cnt; i++) { bool found_fh = false; - for (j = 0; j < m2->fh_versions_cnt; i++) { + for (j = 0; j < m2->fh_versions_cnt; j++) { if (nfs_compare_fh(&m1->fh_versions[i], &m2->fh_versions[j]) == 0) { found_fh = true; @@ -1859,11 +1859,9 @@ ff_layout_encode_layoutreturn(struct pnfs_layout_hdr *lo, start = xdr_reserve_space(xdr, 4); BUG_ON(!start); - if (ff_layout_encode_ioerr(flo, xdr, args)) - goto out; - + ff_layout_encode_ioerr(flo, xdr, args); ff_layout_encode_iostats(flo, xdr, args); -out: + *start = cpu_to_be32((xdr->p - start - 1) * 4); dprintk("%s: Return\n", __func__); } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c7e8b87da..3e2071a17 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1641,6 +1641,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) unsigned long invalid = 0; unsigned long now = jiffies; unsigned long save_cache_validity; + bool cache_revalidated = true; dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n", __func__, inode->i_sb->s_id, inode->i_ino, @@ -1702,22 +1703,28 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_force_lookup_revalidate(inode); inode->i_version = fattr->change_attr; } - } else + } else { nfsi->cache_validity |= save_cache_validity; + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_MTIME) { memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); - } else if (server->caps & NFS_CAP_MTIME) + } else if (server->caps & NFS_CAP_MTIME) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_CTIME) { memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); - } else if (server->caps & NFS_CAP_CTIME) + } else if (server->caps & NFS_CAP_CTIME) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } /* Check if our cached file size is stale */ if (fattr->valid & NFS_ATTR_FATTR_SIZE) { @@ -1737,19 +1744,23 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) (long long)cur_isize, (long long)new_isize); } - } else + } else { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_ATIME) memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); - else if (server->caps & NFS_CAP_ATIME) + else if (server->caps & NFS_CAP_ATIME) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATIME | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_MODE) { if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { @@ -1758,36 +1769,42 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_mode = newmode; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; } - } else if (server->caps & NFS_CAP_MODE) + } else if (server->caps & NFS_CAP_MODE) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_OWNER) { if (!uid_eq(inode->i_uid, fattr->uid)) { invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_uid = fattr->uid; } - } else if (server->caps & NFS_CAP_OWNER) + } else if (server->caps & NFS_CAP_OWNER) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_GROUP) { if (!gid_eq(inode->i_gid, fattr->gid)) { invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_gid = fattr->gid; } - } else if (server->caps & NFS_CAP_OWNER_GROUP) + } else if (server->caps & NFS_CAP_OWNER_GROUP) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_NLINK) { if (inode->i_nlink != fattr->nlink) { @@ -1796,19 +1813,22 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) invalid |= NFS_INO_INVALID_DATA; set_nlink(inode, fattr->nlink); } - } else if (server->caps & NFS_CAP_NLINK) + } else if (server->caps & NFS_CAP_NLINK) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { /* * report the blocks in 512byte units */ inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); - } - if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) + } else if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) inode->i_blocks = fattr->du.nfs2.blocks; + else + cache_revalidated = false; /* Update attrtimeo value if we're out of the unstable period */ if (invalid & NFS_INO_INVALID_ATTR) { @@ -1818,9 +1838,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* Set barrier to be more recent than all outstanding updates */ nfsi->attr_gencount = nfs_inc_attr_generation_counter(); } else { - if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { - if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) - nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); + if (cache_revalidated) { + if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, + nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { + nfsi->attrtimeo <<= 1; + if (nfsi->attrtimeo > NFS_MAXATTRTIMEO(inode)) + nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); + } nfsi->attrtimeo_timestamp = now; } /* Set the barrier to be more recent than this fattr */ @@ -1829,7 +1853,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) } /* Don't declare attrcache up to date if there were no attrs! */ - if (fattr->valid != 0) + if (cache_revalidated) invalid &= ~NFS_INO_INVALID_ATTR; /* Don't invalidate the data if we were to blame */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 343b0f1f1..f496ed721 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1385,6 +1385,7 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s * Protect the call to nfs4_state_set_mode_locked and * serialise the stateid update */ + spin_lock(&state->owner->so_lock); write_seqlock(&state->seqlock); if (deleg_stateid != NULL) { nfs4_stateid_copy(&state->stateid, deleg_stateid); @@ -1393,7 +1394,6 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s if (open_stateid != NULL) nfs_set_open_stateid_locked(state, open_stateid, fmode); write_sequnlock(&state->seqlock); - spin_lock(&state->owner->so_lock); update_open_stateflags(state, fmode); spin_unlock(&state->owner->so_lock); } diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 0a8983492..eff6319d5 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -22,9 +22,9 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new) { - ssize_t list_size, size; - char *buf, *name, *value; - int error; + ssize_t list_size, size, value_size = 0; + char *buf, *name, *value = NULL; + int uninitialized_var(error); if (!old->d_inode->i_op->getxattr || !new->d_inode->i_op->getxattr) @@ -41,29 +41,40 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new) if (!buf) return -ENOMEM; - error = -ENOMEM; - value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL); - if (!value) - goto out; - list_size = vfs_listxattr(old, buf, list_size); if (list_size <= 0) { error = list_size; - goto out_free_value; + goto out; } for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { - size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX); - if (size <= 0) { +retry: + size = vfs_getxattr(old, name, value, value_size); + if (size == -ERANGE) + size = vfs_getxattr(old, name, NULL, 0); + + if (size < 0) { error = size; - goto out_free_value; + break; + } + + if (size > value_size) { + void *new; + + new = krealloc(value, size, GFP_KERNEL); + if (!new) { + error = -ENOMEM; + break; + } + value = new; + value_size = size; + goto retry; } + error = vfs_setxattr(new, name, value, size, 0); if (error) - goto out_free_value; + break; } - -out_free_value: kfree(value); out: kfree(buf); diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 4060ffde8..b29036aa8 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -42,6 +42,19 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr) int err; struct dentry *upperdentry; + /* + * Check for permissions before trying to copy-up. This is redundant + * since it will be rechecked later by ->setattr() on upper dentry. But + * without this, copy-up can be triggered by just about anybody. + * + * We don't initialize inode->size, which just means that + * inode_newsize_ok() will always check against MAX_LFS_FILESIZE and not + * check for a swapfile (which this won't be anyway). + */ + err = inode_change_ok(dentry->d_inode, attr); + if (err) + return err; + err = ovl_want_write(dentry); if (err) goto out; diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 70e9af551..adcb1398c 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -571,7 +571,8 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) (int) PTR_ERR(dentry)); continue; } - ovl_cleanup(upper->d_inode, dentry); + if (dentry->d_inode) + ovl_cleanup(upper->d_inode, dentry); dput(dentry); } mutex_unlock(&upper->d_inode->i_mutex); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index e38ee0fed..f42c9407f 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -9,6 +9,7 @@ #include <linux/fs.h> #include <linux/namei.h> +#include <linux/pagemap.h> #include <linux/xattr.h> #include <linux/security.h> #include <linux/mount.h> @@ -910,6 +911,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) } sb->s_stack_depth = 0; + sb->s_maxbytes = MAX_LFS_FILESIZE; if (ufs->config.upperdir) { if (!ufs->config.workdir) { pr_err("overlayfs: missing 'workdir'\n"); @@ -1053,6 +1055,9 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) root_dentry->d_fsdata = oe; + ovl_copyattr(ovl_dentry_real(root_dentry)->d_inode, + root_dentry->d_inode); + sb->s_magic = OVERLAYFS_SUPER_MAGIC; sb->s_op = &ovl_super_operations; sb->s_root = root_dentry; diff --git a/fs/proc/array.c b/fs/proc/array.c index d73291f5f..b6c00ce0e 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -395,7 +395,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, state = *get_task_state(task); vsize = eip = esp = 0; - permitted = ptrace_may_access(task, PTRACE_MODE_READ | PTRACE_MODE_NOAUDIT); + permitted = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS | PTRACE_MODE_NOAUDIT); mm = get_task_mm(task); if (mm) { vsize = task_vsize(mm); diff --git a/fs/proc/base.c b/fs/proc/base.c index aa41f2a7b..8be0e4cd2 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -403,7 +403,7 @@ static const struct file_operations proc_pid_cmdline_ops = { static int proc_pid_auxv(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { - struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ); + struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); if (mm && !IS_ERR(mm)) { unsigned int nwords = 0; do { @@ -430,7 +430,8 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, wchan = get_wchan(task); - if (wchan && ptrace_may_access(task, PTRACE_MODE_READ) && !lookup_symbol_name(wchan, symname)) + if (wchan && ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS) + && !lookup_symbol_name(wchan, symname)) seq_printf(m, "%s", symname); else seq_putc(m, '0'); @@ -444,7 +445,7 @@ static int lock_trace(struct task_struct *task) int err = mutex_lock_killable(&task->signal->cred_guard_mutex); if (err) return err; - if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) { + if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) { mutex_unlock(&task->signal->cred_guard_mutex); return -EPERM; } @@ -697,7 +698,7 @@ static int proc_fd_access_allowed(struct inode *inode) */ task = get_proc_task(inode); if (task) { - allowed = ptrace_may_access(task, PTRACE_MODE_READ); + allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); put_task_struct(task); } return allowed; @@ -732,7 +733,7 @@ static bool has_pid_permissions(struct pid_namespace *pid, return true; if (in_group_p(pid->pid_gid)) return true; - return ptrace_may_access(task, PTRACE_MODE_READ); + return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); } @@ -809,7 +810,7 @@ struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode) struct mm_struct *mm = ERR_PTR(-ESRCH); if (task) { - mm = mm_access(task, mode); + mm = mm_access(task, mode | PTRACE_MODE_FSCREDS); put_task_struct(task); if (!IS_ERR_OR_NULL(mm)) { @@ -1856,7 +1857,7 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) if (!task) goto out_notask; - mm = mm_access(task, PTRACE_MODE_READ); + mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); if (IS_ERR_OR_NULL(mm)) goto out; @@ -2007,7 +2008,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, goto out; result = -EACCES; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) + if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) goto out_put_task; result = -ENOENT; @@ -2060,7 +2061,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) goto out; ret = -EACCES; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) + if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) goto out_put_task; ret = 0; @@ -2530,7 +2531,7 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh if (result) return result; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) { + if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) { result = -EACCES; goto out_unlock; } diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index f6e8354b8..1b0ea4a5d 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -42,7 +42,7 @@ static const char *proc_ns_follow_link(struct dentry *dentry, void **cookie) if (!task) return error; - if (ptrace_may_access(task, PTRACE_MODE_READ)) { + if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) { error = ns_get_path(&ns_path, task, ns_ops); if (!error) nd_jump_link(&ns_path); @@ -63,7 +63,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl if (!task) return res; - if (ptrace_may_access(task, PTRACE_MODE_READ)) { + if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) { res = ns_get_name(name, sizeof(name), task, ns_ops); if (res >= 0) res = readlink_copy(buffer, buflen, name); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index e03793e75..e4733feb0 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1476,18 +1476,19 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { + pte_t huge_pte = huge_ptep_get(pte); struct numa_maps *md; struct page *page; - if (!pte_present(*pte)) + if (!pte_present(huge_pte)) return 0; - page = pte_page(*pte); + page = pte_page(huge_pte); if (!page) return 0; md = walk->private; - gather_stats(page, md, pte_dirty(*pte), 1); + gather_stats(page, md, pte_dirty(huge_pte), 1); return 0; } diff --git a/fs/timerfd.c b/fs/timerfd.c index b94fa6c3c..053818dd6 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -153,7 +153,7 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) if (isalarm(ctx)) remaining = alarm_expires_remaining(&ctx->t.alarm); else - remaining = hrtimer_expires_remaining(&ctx->t.tmr); + remaining = hrtimer_expires_remaining_adjusted(&ctx->t.tmr); return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; } diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 8d0b3ade0..566df9b5a 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -2047,14 +2047,29 @@ void udf_write_aext(struct inode *inode, struct extent_position *epos, epos->offset += adsize; } +/* + * Only 1 indirect extent in a row really makes sense but allow upto 16 in case + * someone does some weird stuff. + */ +#define UDF_MAX_INDIR_EXTS 16 + int8_t udf_next_aext(struct inode *inode, struct extent_position *epos, struct kernel_lb_addr *eloc, uint32_t *elen, int inc) { int8_t etype; + unsigned int indirections = 0; while ((etype = udf_current_aext(inode, epos, eloc, elen, inc)) == (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) { int block; + + if (++indirections > UDF_MAX_INDIR_EXTS) { + udf_err(inode->i_sb, + "too many indirect extents in inode %lu\n", + inode->i_ino); + return -1; + } + epos->block = *eloc; epos->offset = sizeof(struct allocExtDesc); brelse(epos->bh); diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index ab478e62b..e788a05aa 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -128,11 +128,15 @@ int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i) if (c < 0x80U) utf_o->u_name[utf_o->u_len++] = (uint8_t)c; else if (c < 0x800U) { + if (utf_o->u_len > (UDF_NAME_LEN - 4)) + break; utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xc0 | (c >> 6)); utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | (c & 0x3f)); } else { + if (utf_o->u_len > (UDF_NAME_LEN - 5)) + break; utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xe0 | (c >> 12)); utf_o->u_name[utf_o->u_len++] = @@ -173,17 +177,22 @@ int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i) static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length) { unsigned c, i, max_val, utf_char; - int utf_cnt, u_len; + int utf_cnt, u_len, u_ch; memset(ocu, 0, sizeof(dstring) * length); ocu[0] = 8; max_val = 0xffU; + u_ch = 1; try_again: u_len = 0U; utf_char = 0U; utf_cnt = 0U; for (i = 0U; i < utf->u_len; i++) { + /* Name didn't fit? */ + if (u_len + 1 + u_ch >= length) + return 0; + c = (uint8_t)utf->u_name[i]; /* Complete a multi-byte UTF-8 character */ @@ -225,6 +234,7 @@ try_again: if (max_val == 0xffU) { max_val = 0xffffU; ocu[0] = (uint8_t)0x10U; + u_ch = 2; goto try_again; } goto error_out; @@ -277,7 +287,7 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, c = (c << 8) | ocu[i++]; len = nls->uni2char(c, &utf_o->u_name[utf_o->u_len], - UDF_NAME_LEN - utf_o->u_len); + UDF_NAME_LEN - 2 - utf_o->u_len); /* Valid character? */ if (len >= 0) utf_o->u_len += len; @@ -295,15 +305,19 @@ static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni, int len; unsigned i, max_val; uint16_t uni_char; - int u_len; + int u_len, u_ch; memset(ocu, 0, sizeof(dstring) * length); ocu[0] = 8; max_val = 0xffU; + u_ch = 1; try_again: u_len = 0U; for (i = 0U; i < uni->u_len; i++) { + /* Name didn't fit? */ + if (u_len + 1 + u_ch >= length) + return 0; len = nls->char2uni(&uni->u_name[i], uni->u_len - i, &uni_char); if (!len) continue; @@ -316,6 +330,7 @@ try_again: if (uni_char > max_val) { max_val = 0xffffU; ocu[0] = (uint8_t)0x10U; + u_ch = 2; goto try_again; } diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 8774498ce..e2536bb1c 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -786,7 +786,7 @@ typedef struct xfs_agfl { __be64 agfl_lsn; __be32 agfl_crc; __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */ -} xfs_agfl_t; +} __attribute__((packed)) xfs_agfl_t; #define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc) diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 268c00f4f..65485cfc4 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -62,11 +62,12 @@ xfs_inobp_check( * has not had the inode cores stamped into it. Hence for readahead, the buffer * may be potentially invalid. * - * If the readahead buffer is invalid, we don't want to mark it with an error, - * but we do want to clear the DONE status of the buffer so that a followup read - * will re-read it from disk. This will ensure that we don't get an unnecessary - * warnings during log recovery and we don't get unnecssary panics on debug - * kernels. + * If the readahead buffer is invalid, we need to mark it with an error and + * clear the DONE status of the buffer so that a followup read will re-read it + * from disk. We don't report the error otherwise to avoid warnings during log + * recovery and we don't get unnecssary panics on debug kernels. We use EIO here + * because all we want to do is say readahead failed; there is no-one to report + * the error to, so this will distinguish it from a non-ra verifier failure. */ static void xfs_inode_buf_verify( @@ -93,6 +94,7 @@ xfs_inode_buf_verify( XFS_RANDOM_ITOBP_INOTOBP))) { if (readahead) { bp->b_flags &= ~XBF_DONE; + xfs_buf_ioerror(bp, -EIO); return; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 3243cdf97..39090fc56 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -604,6 +604,13 @@ found: } } + /* + * Clear b_error if this is a lookup from a caller that doesn't expect + * valid data to be found in the buffer. + */ + if (!(flags & XBF_READ)) + xfs_buf_ioerror(bp, 0); + XFS_STATS_INC(target->bt_mount, xb_get); trace_xfs_buf_get(bp, flags, _RET_IP_); return bp; @@ -1520,6 +1527,16 @@ xfs_wait_buftarg( LIST_HEAD(dispose); int loop = 0; + /* + * We need to flush the buffer workqueue to ensure that all IO + * completion processing is 100% done. Just waiting on buffer locks is + * not sufficient for async IO as the reference count held over IO is + * not released until after the buffer lock is dropped. Hence we need to + * ensure here that all reference counts have been dropped before we + * start walking the LRU list. + */ + drain_workqueue(btp->bt_mount->m_buf_workqueue); + /* loop until there is nothing left on the lru list. */ while (list_lru_count(&btp->bt_lru)) { list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele, |