From 394569928e2f17dff4ae367ac700048138e318c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Fabian=20Silva=20Delgado?= Date: Sat, 23 Apr 2016 04:02:05 -0300 Subject: Linux-libre 4.5.2-gnu --- fs/btrfs/file.c | 2 +- fs/btrfs/tree-log.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/dcache.c | 5 +- fs/drop_caches.c | 4 +- fs/ext4/crypto.c | 12 +++-- fs/ext4/ext4.h | 23 +++++++++ fs/ext4/file.c | 12 +++-- fs/ext4/move_extent.c | 11 +++- fs/ext4/super.c | 47 ++++++++++++----- fs/nfs/dir.c | 6 +-- fs/nfs/inode.c | 2 +- fs/nfs/nfs4file.c | 4 +- fs/overlayfs/super.c | 33 ++++++++++++ fs/super.c | 47 +++++++++++++++++ 14 files changed, 313 insertions(+), 32 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 098bb8f69..9a30ca640 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1883,7 +1883,7 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end) */ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { - struct dentry *dentry = file->f_path.dentry; + struct dentry *dentry = file_dentry(file); struct inode *inode = d_inode(dentry); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 978c3a810..849a30aa1 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4414,6 +4414,127 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, return ret; } +/* + * When we are logging a new inode X, check if it doesn't have a reference that + * matches the reference from some other inode Y created in a past transaction + * and that was renamed in the current transaction. If we don't do this, then at + * log replay time we can lose inode Y (and all its files if it's a directory): + * + * mkdir /mnt/x + * echo "hello world" > /mnt/x/foobar + * sync + * mv /mnt/x /mnt/y + * mkdir /mnt/x # or touch /mnt/x + * xfs_io -c fsync /mnt/x + * + * mount fs, trigger log replay + * + * After the log replay procedure, we would lose the first directory and all its + * files (file foobar). + * For the case where inode Y is not a directory we simply end up losing it: + * + * echo "123" > /mnt/foo + * sync + * mv /mnt/foo /mnt/bar + * echo "abc" > /mnt/foo + * xfs_io -c fsync /mnt/foo + * + * + * We also need this for cases where a snapshot entry is replaced by some other + * entry (file or directory) otherwise we end up with an unreplayable log due to + * attempts to delete the snapshot entry (entry of type BTRFS_ROOT_ITEM_KEY) as + * if it were a regular entry: + * + * mkdir /mnt/x + * btrfs subvolume snapshot /mnt /mnt/x/snap + * btrfs subvolume delete /mnt/x/snap + * rmdir /mnt/x + * mkdir /mnt/x + * fsync /mnt/x or fsync some new file inside it + * + * + * The snapshot delete, rmdir of x, mkdir of a new x and the fsync all happen in + * the same transaction. + */ +static int btrfs_check_ref_name_override(struct extent_buffer *eb, + const int slot, + const struct btrfs_key *key, + struct inode *inode) +{ + int ret; + struct btrfs_path *search_path; + char *name = NULL; + u32 name_len = 0; + u32 item_size = btrfs_item_size_nr(eb, slot); + u32 cur_offset = 0; + unsigned long ptr = btrfs_item_ptr_offset(eb, slot); + + search_path = btrfs_alloc_path(); + if (!search_path) + return -ENOMEM; + search_path->search_commit_root = 1; + search_path->skip_locking = 1; + + while (cur_offset < item_size) { + u64 parent; + u32 this_name_len; + u32 this_len; + unsigned long name_ptr; + struct btrfs_dir_item *di; + + if (key->type == BTRFS_INODE_REF_KEY) { + struct btrfs_inode_ref *iref; + + iref = (struct btrfs_inode_ref *)(ptr + cur_offset); + parent = key->offset; + this_name_len = btrfs_inode_ref_name_len(eb, iref); + name_ptr = (unsigned long)(iref + 1); + this_len = sizeof(*iref) + this_name_len; + } else { + struct btrfs_inode_extref *extref; + + extref = (struct btrfs_inode_extref *)(ptr + + cur_offset); + parent = btrfs_inode_extref_parent(eb, extref); + this_name_len = btrfs_inode_extref_name_len(eb, extref); + name_ptr = (unsigned long)&extref->name; + this_len = sizeof(*extref) + this_name_len; + } + + if (this_name_len > name_len) { + char *new_name; + + new_name = krealloc(name, this_name_len, GFP_NOFS); + if (!new_name) { + ret = -ENOMEM; + goto out; + } + name_len = this_name_len; + name = new_name; + } + + read_extent_buffer(eb, name, name_ptr, this_name_len); + di = btrfs_lookup_dir_item(NULL, BTRFS_I(inode)->root, + search_path, parent, + name, this_name_len, 0); + if (di && !IS_ERR(di)) { + ret = 1; + goto out; + } else if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out; + } + btrfs_release_path(search_path); + + cur_offset += this_len; + } + ret = 0; +out: + btrfs_free_path(search_path); + kfree(name); + return ret; +} + /* log a single inode in the tree log. * At least one parent directory for this inode must exist in the tree * or be logged already. @@ -4586,6 +4707,22 @@ again: if (min_key.type == BTRFS_INODE_ITEM_KEY) need_log_inode_item = false; + if ((min_key.type == BTRFS_INODE_REF_KEY || + min_key.type == BTRFS_INODE_EXTREF_KEY) && + BTRFS_I(inode)->generation == trans->transid) { + ret = btrfs_check_ref_name_override(path->nodes[0], + path->slots[0], + &min_key, inode); + if (ret < 0) { + err = ret; + goto out_unlock; + } else if (ret > 0) { + err = 1; + btrfs_set_log_full_commit(root->fs_info, trans); + goto out_unlock; + } + } + /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */ if (min_key.type == BTRFS_XATTR_ITEM_KEY) { if (ins_nr == 0) diff --git a/fs/dcache.c b/fs/dcache.c index 72d3cc89c..6bdabe2fc 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1668,7 +1668,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE | DCACHE_OP_DELETE | - DCACHE_OP_SELECT_INODE)); + DCACHE_OP_SELECT_INODE | + DCACHE_OP_REAL)); dentry->d_op = op; if (!op) return; @@ -1686,6 +1687,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) dentry->d_flags |= DCACHE_OP_PRUNE; if (op->d_select_inode) dentry->d_flags |= DCACHE_OP_SELECT_INODE; + if (op->d_real) + dentry->d_flags |= DCACHE_OP_REAL; } EXPORT_SYMBOL(d_set_d_op); diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 4f591f190..d7ca3d3a9 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -14,6 +14,8 @@ /* A global variable is a bit ugly, but it keeps the code simple */ int sysctl_drop_caches; +extern void iterate_supers_no_sb_lock(void (*f)(struct super_block *, void *), void *arg); + static void drop_pagecache_sb(struct super_block *sb, void *unused) { struct inode *inode, *toput_inode = NULL; @@ -43,7 +45,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) /* For TuxOnIce */ void drop_pagecache(void) { - iterate_supers(drop_pagecache_sb, NULL); + iterate_supers_no_sb_lock(drop_pagecache_sb, NULL); } int drop_caches_sysctl_handler(struct ctl_table *table, int write, diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index 38f756248..ecb543944 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -475,13 +475,16 @@ uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size) */ static int ext4_d_revalidate(struct dentry *dentry, unsigned int flags) { - struct inode *dir = d_inode(dentry->d_parent); - struct ext4_crypt_info *ci = EXT4_I(dir)->i_crypt_info; + struct dentry *dir; + struct ext4_crypt_info *ci; int dir_has_key, cached_with_key; - if (!ext4_encrypted_inode(dir)) + dir = dget_parent(dentry); + if (!ext4_encrypted_inode(d_inode(dir))) { + dput(dir); return 0; - + } + ci = EXT4_I(d_inode(dir))->i_crypt_info; if (ci && ci->ci_keyring_key && (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) | (1 << KEY_FLAG_REVOKED) | @@ -491,6 +494,7 @@ static int ext4_d_revalidate(struct dentry *dentry, unsigned int flags) /* this should eventually be an flag in d_flags */ cached_with_key = dentry->d_fsdata != NULL; dir_has_key = (ci != NULL); + dput(dir); /* * If the dentry was cached without the key, and it is a diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 157b458a6..b213449a5 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -899,6 +899,29 @@ do { \ #include "extents_status.h" +/* + * Lock subclasses for i_data_sem in the ext4_inode_info structure. + * + * These are needed to avoid lockdep false positives when we need to + * allocate blocks to the quota inode during ext4_map_blocks(), while + * holding i_data_sem for a normal (non-quota) inode. Since we don't + * do quota tracking for the quota inode, this avoids deadlock (as + * well as infinite recursion, since it isn't turtles all the way + * down...) + * + * I_DATA_SEM_NORMAL - Used for most inodes + * I_DATA_SEM_OTHER - Used by move_inode.c for the second normal inode + * where the second inode has larger inode number + * than the first + * I_DATA_SEM_QUOTA - Used for quota inodes only + */ +enum { + I_DATA_SEM_NORMAL = 0, + I_DATA_SEM_OTHER, + I_DATA_SEM_QUOTA, +}; + + /* * fourth extended file system inode data in memory */ diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 4cd318f31..38847f38b 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -335,7 +335,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) struct super_block *sb = inode->i_sb; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct vfsmount *mnt = filp->f_path.mnt; - struct inode *dir = filp->f_path.dentry->d_parent->d_inode; + struct dentry *dir; struct path path; char buf[64], *cp; int ret; @@ -379,14 +379,18 @@ static int ext4_file_open(struct inode * inode, struct file * filp) if (ext4_encryption_info(inode) == NULL) return -ENOKEY; } - if (ext4_encrypted_inode(dir) && - !ext4_is_child_context_consistent_with_parent(dir, inode)) { + + dir = dget_parent(file_dentry(filp)); + if (ext4_encrypted_inode(d_inode(dir)) && + !ext4_is_child_context_consistent_with_parent(d_inode(dir), inode)) { ext4_warning(inode->i_sb, "Inconsistent encryption contexts: %lu/%lu\n", - (unsigned long) dir->i_ino, + (unsigned long) d_inode(dir)->i_ino, (unsigned long) inode->i_ino); + dput(dir); return -EPERM; } + dput(dir); /* * Set up the jbd2_inode if we are opening the inode for * writing and the journal is present diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 4098acc70..796ff0eaf 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -60,10 +60,10 @@ ext4_double_down_write_data_sem(struct inode *first, struct inode *second) { if (first < second) { down_write(&EXT4_I(first)->i_data_sem); - down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING); + down_write_nested(&EXT4_I(second)->i_data_sem, I_DATA_SEM_OTHER); } else { down_write(&EXT4_I(second)->i_data_sem); - down_write_nested(&EXT4_I(first)->i_data_sem, SINGLE_DEPTH_NESTING); + down_write_nested(&EXT4_I(first)->i_data_sem, I_DATA_SEM_OTHER); } } @@ -484,6 +484,13 @@ mext_check_arguments(struct inode *orig_inode, return -EBUSY; } + if (IS_NOQUOTA(orig_inode) || IS_NOQUOTA(donor_inode)) { + ext4_debug("ext4 move extent: The argument files should " + "not be quota files [ino:orig %lu, donor %lu]\n", + orig_inode->i_ino, donor_inode->i_ino); + return -EBUSY; + } + /* Ext4 move extent supports only extent based file */ if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) { ext4_debug("ext4 move extent: orig file is not extents " diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3ed01ec01..a76ca677f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1324,9 +1324,9 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) return -1; } if (ext4_has_feature_quota(sb)) { - ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options " - "when QUOTA feature is enabled"); - return -1; + ext4_msg(sb, KERN_INFO, "Journaled quota options " + "ignored when QUOTA feature is enabled"); + return 1; } qname = match_strdup(args); if (!qname) { @@ -1689,10 +1689,10 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, return -1; } if (ext4_has_feature_quota(sb)) { - ext4_msg(sb, KERN_ERR, - "Cannot set journaled quota options " + ext4_msg(sb, KERN_INFO, + "Quota format mount options ignored " "when QUOTA feature is enabled"); - return -1; + return 1; } sbi->s_jquota_fmt = m->mount_opt; #endif @@ -1753,11 +1753,11 @@ static int parse_options(char *options, struct super_block *sb, #ifdef CONFIG_QUOTA if (ext4_has_feature_quota(sb) && (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) { - ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA " - "feature is enabled"); - return 0; - } - if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { + ext4_msg(sb, KERN_INFO, "Quota feature enabled, usrquota and grpquota " + "mount options ignored."); + clear_opt(sb, USRQUOTA); + clear_opt(sb, GRPQUOTA); + } else if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) clear_opt(sb, USRQUOTA); @@ -5021,6 +5021,20 @@ static int ext4_quota_on_mount(struct super_block *sb, int type) EXT4_SB(sb)->s_jquota_fmt, type); } +static void lockdep_set_quota_inode(struct inode *inode, int subclass) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + + /* The first argument of lockdep_set_subclass has to be + * *exactly* the same as the argument to init_rwsem() --- in + * this case, in init_once() --- or lockdep gets unhappy + * because the name of the lock is set using the + * stringification of the argument to init_rwsem(). + */ + (void) ei; /* shut up clang warning if !CONFIG_LOCKDEP */ + lockdep_set_subclass(&ei->i_data_sem, subclass); +} + /* * Standard function to be called on quota_on */ @@ -5060,8 +5074,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, if (err) return err; } - - return dquot_quota_on(sb, type, format_id, path); + lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA); + err = dquot_quota_on(sb, type, format_id, path); + if (err) + lockdep_set_quota_inode(path->dentry->d_inode, + I_DATA_SEM_NORMAL); + return err; } static int ext4_quota_enable(struct super_block *sb, int type, int format_id, @@ -5088,8 +5106,11 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, /* Don't account quota for quota files to avoid recursion */ qf_inode->i_flags |= S_NOQUOTA; + lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA); err = dquot_enable(qf_inode, type, format_id, flags); iput(qf_inode); + if (err) + lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL); return err; } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 9cce67043..7ded17764 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -377,7 +377,7 @@ int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc, again: timestamp = jiffies; gencount = nfs_inc_attr_generation_counter(); - error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, entry->cookie, pages, + error = NFS_PROTO(inode)->readdir(file_dentry(file), cred, entry->cookie, pages, NFS_SERVER(inode)->dtsize, desc->plus); if (error < 0) { /* We requested READDIRPLUS, but the server doesn't grok it */ @@ -560,7 +560,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en count++; if (desc->plus != 0) - nfs_prime_dcache(desc->file->f_path.dentry, entry); + nfs_prime_dcache(file_dentry(desc->file), entry); status = nfs_readdir_add_to_array(entry, page); if (status != 0) @@ -864,7 +864,7 @@ static bool nfs_dir_mapping_need_revalidate(struct inode *dir) */ static int nfs_readdir(struct file *file, struct dir_context *ctx) { - struct dentry *dentry = file->f_path.dentry; + struct dentry *dentry = file_dentry(file); struct inode *inode = d_inode(dentry); nfs_readdir_descriptor_t my_desc, *desc = &my_desc; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 86faecf8f..847b678af 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -940,7 +940,7 @@ int nfs_open(struct inode *inode, struct file *filp) { struct nfs_open_context *ctx; - ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode); + ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode); if (IS_ERR(ctx)) return PTR_ERR(ctx); nfs_file_set_open_context(filp, ctx); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 57ca1c803..2a9ff14cf 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -26,7 +26,7 @@ static int nfs4_file_open(struct inode *inode, struct file *filp) { struct nfs_open_context *ctx; - struct dentry *dentry = filp->f_path.dentry; + struct dentry *dentry = file_dentry(filp); struct dentry *parent = NULL; struct inode *dir; unsigned openflags = filp->f_flags; @@ -57,7 +57,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) parent = dget_parent(dentry); dir = d_inode(parent); - ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode); + ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode); err = PTR_ERR(ctx); if (IS_ERR(ctx)) goto out; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 619ad4b01..4399ea804 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -295,6 +295,37 @@ static void ovl_dentry_release(struct dentry *dentry) } } +static struct dentry *ovl_d_real(struct dentry *dentry, struct inode *inode) +{ + struct dentry *real; + + if (d_is_dir(dentry)) { + if (!inode || inode == d_inode(dentry)) + return dentry; + goto bug; + } + + real = ovl_dentry_upper(dentry); + if (real && (!inode || inode == d_inode(real))) + return real; + + real = ovl_dentry_lower(dentry); + if (!real) + goto bug; + + if (!inode || inode == d_inode(real)) + return real; + + /* Handle recursion */ + if (real->d_flags & DCACHE_OP_REAL) + return real->d_op->d_real(real, inode); + +bug: + WARN(1, "ovl_d_real(%pd4, %s:%lu\n): real dentry not found\n", dentry, + inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0); + return dentry; +} + static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) { struct ovl_entry *oe = dentry->d_fsdata; @@ -339,11 +370,13 @@ static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) static const struct dentry_operations ovl_dentry_operations = { .d_release = ovl_dentry_release, .d_select_inode = ovl_d_select_inode, + .d_real = ovl_d_real, }; static const struct dentry_operations ovl_reval_dentry_operations = { .d_release = ovl_dentry_release, .d_select_inode = ovl_d_select_inode, + .d_real = ovl_d_real, .d_revalidate = ovl_dentry_revalidate, .d_weak_revalidate = ovl_dentry_weak_revalidate, }; diff --git a/fs/super.c b/fs/super.c index ad57841f7..3446dc4fb 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1372,3 +1372,50 @@ out: return 0; } EXPORT_SYMBOL(thaw_super); + +/** + * lock_supers -- Stop other processes from modifying the list of super blocks. + */ +void take_super_lock() { + spin_lock(&sb_lock); +} + +/** + * unlock_supers -- Allow other processes to again modify the list of super blocks. + */ +void release_super_lock() { + spin_unlock(&sb_lock); +} + +/** + * iterate_supers_no_sb_lock - call function for all active superblocks without using sb_lock + * + * Note that the callback must work with sb_lock taken and not unlock it. + * + * @f: function to call + * @arg: argument to pass to it + * + * Scans the superblock list and calls given function, passing it + * locked superblock and given argument. + */ +void iterate_supers_no_sb_lock(void (*f)(struct super_block *, void *), void *arg) +{ + struct super_block *sb, *p = NULL; + + list_for_each_entry(sb, &super_blocks, s_list) { + if (hlist_unhashed(&sb->s_instances)) + continue; + sb->s_count++; + + down_read(&sb->s_umount); + if (sb->s_root && (sb->s_flags & MS_BORN)) + f(sb, arg); + up_read(&sb->s_umount); + + if (p) + __put_super(p); + p = sb; + } + if (p) + __put_super(p); +} -- cgit v1.2.3