From f3a16ba6a1152b8966dcadc668af4cf00623c7b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Fabian=20Silva=20Delgado?= Date: Wed, 16 Dec 2015 14:55:49 -0300 Subject: Linux-libre 4.3.3-gnu --- fs/btrfs/ctree.h | 4 +- fs/btrfs/delayed-ref.c | 139 ++++++++++++++++++++++---- fs/btrfs/delayed-ref.h | 7 +- fs/btrfs/extent-tree.c | 59 ++++++------ fs/btrfs/file.c | 36 ++++--- fs/btrfs/inode.c | 96 ++++++++++++++---- fs/btrfs/ioctl.c | 257 +++++++++++++++++++++++++++++-------------------- fs/btrfs/relocation.c | 16 ++- fs/btrfs/send.c | 10 +- fs/btrfs/tree-log.c | 2 +- fs/btrfs/xattr.c | 4 +- fs/ceph/mds_client.c | 2 +- fs/debugfs/inode.c | 6 +- fs/ext4/crypto.c | 23 ++++- fs/ext4/ext4_jbd2.c | 6 +- fs/ext4/extents.c | 3 + fs/ext4/page-io.c | 5 +- fs/ext4/super.c | 12 ++- fs/jbd2/journal.c | 6 +- fs/nfs/inode.c | 6 +- fs/nfs/nfs4client.c | 2 +- fs/nfs/pnfs.c | 56 ++++++----- fs/nfsd/nfs4state.c | 127 +++++++++++++++++++++--- fs/nfsd/state.h | 19 ++-- fs/ocfs2/namei.c | 2 + 25 files changed, 639 insertions(+), 266 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 938efe33b..94eea1f43 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3398,7 +3398,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, - u64 owner, u64 offset, int no_quota); + u64 owner, u64 offset); int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len, int delalloc); @@ -3411,7 +3411,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, - u64 root_objectid, u64 owner, u64 offset, int no_quota); + u64 root_objectid, u64 owner, u64 offset); int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index ac3e81da6..7832031fe 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -197,6 +197,119 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans, trans->delayed_ref_updates--; } +static bool merge_ref(struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head, + struct btrfs_delayed_ref_node *ref, + u64 seq) +{ + struct btrfs_delayed_ref_node *next; + bool done = false; + + next = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node, + list); + while (!done && &next->list != &head->ref_list) { + int mod; + struct btrfs_delayed_ref_node *next2; + + next2 = list_next_entry(next, list); + + if (next == ref) + goto next; + + if (seq && next->seq >= seq) + goto next; + + if (next->type != ref->type) + goto next; + + if ((ref->type == BTRFS_TREE_BLOCK_REF_KEY || + ref->type == BTRFS_SHARED_BLOCK_REF_KEY) && + comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref), + btrfs_delayed_node_to_tree_ref(next), + ref->type)) + goto next; + if ((ref->type == BTRFS_EXTENT_DATA_REF_KEY || + ref->type == BTRFS_SHARED_DATA_REF_KEY) && + comp_data_refs(btrfs_delayed_node_to_data_ref(ref), + btrfs_delayed_node_to_data_ref(next))) + goto next; + + if (ref->action == next->action) { + mod = next->ref_mod; + } else { + if (ref->ref_mod < next->ref_mod) { + swap(ref, next); + done = true; + } + mod = -next->ref_mod; + } + + drop_delayed_ref(trans, delayed_refs, head, next); + ref->ref_mod += mod; + if (ref->ref_mod == 0) { + drop_delayed_ref(trans, delayed_refs, head, ref); + done = true; + } else { + /* + * Can't have multiples of the same ref on a tree block. + */ + WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY || + ref->type == BTRFS_SHARED_BLOCK_REF_KEY); + } +next: + next = next2; + } + + return done; +} + +void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head) +{ + struct btrfs_delayed_ref_node *ref; + u64 seq = 0; + + assert_spin_locked(&head->lock); + + if (list_empty(&head->ref_list)) + return; + + /* We don't have too many refs to merge for data. */ + if (head->is_data) + return; + + spin_lock(&fs_info->tree_mod_seq_lock); + if (!list_empty(&fs_info->tree_mod_seq_list)) { + struct seq_list *elem; + + elem = list_first_entry(&fs_info->tree_mod_seq_list, + struct seq_list, list); + seq = elem->seq; + } + spin_unlock(&fs_info->tree_mod_seq_lock); + + ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node, + list); + while (&ref->list != &head->ref_list) { + if (seq && ref->seq >= seq) + goto next; + + if (merge_ref(trans, delayed_refs, head, ref, seq)) { + if (list_empty(&head->ref_list)) + break; + ref = list_first_entry(&head->ref_list, + struct btrfs_delayed_ref_node, + list); + continue; + } +next: + ref = list_next_entry(ref, list); + } +} + int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_root *delayed_refs, u64 seq) @@ -292,8 +405,7 @@ add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans, exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node, list); /* No need to compare bytenr nor is_head */ - if (exist->type != ref->type || exist->no_quota != ref->no_quota || - exist->seq != ref->seq) + if (exist->type != ref->type || exist->seq != ref->seq) goto add_tail; if ((exist->type == BTRFS_TREE_BLOCK_REF_KEY || @@ -524,7 +636,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_head *head_ref, struct btrfs_delayed_ref_node *ref, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, int level, - int action, int no_quota) + int action) { struct btrfs_delayed_tree_ref *full_ref; struct btrfs_delayed_ref_root *delayed_refs; @@ -546,7 +658,6 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, ref->action = action; ref->is_head = 0; ref->in_tree = 1; - ref->no_quota = no_quota; ref->seq = seq; full_ref = btrfs_delayed_node_to_tree_ref(ref); @@ -579,7 +690,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_head *head_ref, struct btrfs_delayed_ref_node *ref, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, u64 owner, - u64 offset, int action, int no_quota) + u64 offset, int action) { struct btrfs_delayed_data_ref *full_ref; struct btrfs_delayed_ref_root *delayed_refs; @@ -602,7 +713,6 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, ref->action = action; ref->is_head = 0; ref->in_tree = 1; - ref->no_quota = no_quota; ref->seq = seq; full_ref = btrfs_delayed_node_to_data_ref(ref); @@ -633,17 +743,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, int level, int action, - struct btrfs_delayed_extent_op *extent_op, - int no_quota) + struct btrfs_delayed_extent_op *extent_op) { struct btrfs_delayed_tree_ref *ref; struct btrfs_delayed_ref_head *head_ref; struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_qgroup_extent_record *record = NULL; - if (!is_fstree(ref_root) || !fs_info->quota_enabled) - no_quota = 0; - BUG_ON(extent_op && extent_op->is_data); ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); if (!ref) @@ -672,8 +778,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, bytenr, num_bytes, action, 0); add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, - num_bytes, parent, ref_root, level, action, - no_quota); + num_bytes, parent, ref_root, level, action); spin_unlock(&delayed_refs->lock); return 0; @@ -694,17 +799,13 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, u64 owner, u64 offset, int action, - struct btrfs_delayed_extent_op *extent_op, - int no_quota) + struct btrfs_delayed_extent_op *extent_op) { struct btrfs_delayed_data_ref *ref; struct btrfs_delayed_ref_head *head_ref; struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_qgroup_extent_record *record = NULL; - if (!is_fstree(ref_root) || !fs_info->quota_enabled) - no_quota = 0; - BUG_ON(extent_op && !extent_op->is_data); ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS); if (!ref) @@ -740,7 +841,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, num_bytes, parent, ref_root, owner, offset, - action, no_quota); + action); spin_unlock(&delayed_refs->lock); return 0; diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 13fb5e609..930887a42 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -68,7 +68,6 @@ struct btrfs_delayed_ref_node { unsigned int action:8; unsigned int type:8; - unsigned int no_quota:1; /* is this node still in the rbtree? */ unsigned int is_head:1; unsigned int in_tree:1; @@ -233,15 +232,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, int level, int action, - struct btrfs_delayed_extent_op *extent_op, - int no_quota); + struct btrfs_delayed_extent_op *extent_op); int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, u64 owner, u64 offset, int action, - struct btrfs_delayed_extent_op *extent_op, - int no_quota); + struct btrfs_delayed_extent_op *extent_op); int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 601d7d45d..cadacf643 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -95,8 +95,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 parent, u64 root_objectid, u64 flags, struct btrfs_disk_key *key, - int level, struct btrfs_key *ins, - int no_quota); + int level, struct btrfs_key *ins); static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 flags, int force); @@ -2009,8 +2008,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, - u64 root_objectid, u64 owner, u64 offset, - int no_quota) + u64 root_objectid, u64 owner, u64 offset) { int ret; struct btrfs_fs_info *fs_info = root->fs_info; @@ -2022,12 +2020,12 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, num_bytes, parent, root_objectid, (int)owner, - BTRFS_ADD_DELAYED_REF, NULL, no_quota); + BTRFS_ADD_DELAYED_REF, NULL); } else { ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, num_bytes, parent, root_objectid, owner, offset, - BTRFS_ADD_DELAYED_REF, NULL, no_quota); + BTRFS_ADD_DELAYED_REF, NULL); } return ret; } @@ -2048,15 +2046,11 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, u64 num_bytes = node->num_bytes; u64 refs; int ret; - int no_quota = node->no_quota; path = btrfs_alloc_path(); if (!path) return -ENOMEM; - if (!is_fstree(root_objectid) || !root->fs_info->quota_enabled) - no_quota = 1; - path->reada = 1; path->leave_spinning = 1; /* this will setup the path even if it fails to insert the back ref */ @@ -2291,8 +2285,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, parent, ref_root, extent_op->flags_to_set, &extent_op->key, - ref->level, &ins, - node->no_quota); + ref->level, &ins); } else if (node->action == BTRFS_ADD_DELAYED_REF) { ret = __btrfs_inc_extent_ref(trans, root, node, parent, ref_root, @@ -2433,7 +2426,21 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, } } + /* + * We need to try and merge add/drops of the same ref since we + * can run into issues with relocate dropping the implicit ref + * and then it being added back again before the drop can + * finish. If we merged anything we need to re-loop so we can + * get a good ref. + * Or we can get node references of the same type that weren't + * merged when created due to bumps in the tree mod seq, and + * we need to merge them to prevent adding an inline extent + * backref before dropping it (triggering a BUG_ON at + * insert_inline_extent_backref()). + */ spin_lock(&locked_ref->lock); + btrfs_merge_delayed_refs(trans, fs_info, delayed_refs, + locked_ref); /* * locked_ref is the head node, so we have to go one @@ -3109,7 +3116,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, int level; int ret = 0; int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, - u64, u64, u64, u64, u64, u64, int); + u64, u64, u64, u64, u64, u64); if (btrfs_test_is_dummy_root(root)) @@ -3150,15 +3157,14 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, key.offset -= btrfs_file_extent_offset(buf, fi); ret = process_func(trans, root, bytenr, num_bytes, parent, ref_root, key.objectid, - key.offset, 1); + key.offset); if (ret) goto fail; } else { bytenr = btrfs_node_blockptr(buf, i); num_bytes = root->nodesize; ret = process_func(trans, root, bytenr, num_bytes, - parent, ref_root, level - 1, 0, - 1); + parent, ref_root, level - 1, 0); if (ret) goto fail; } @@ -6233,7 +6239,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, int extent_slot = 0; int found_extent = 0; int num_to_del = 1; - int no_quota = node->no_quota; u32 item_size; u64 refs; u64 bytenr = node->bytenr; @@ -6242,9 +6247,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, bool skinny_metadata = btrfs_fs_incompat(root->fs_info, SKINNY_METADATA); - if (!info->quota_enabled || !is_fstree(root_objectid)) - no_quota = 1; - path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -6570,7 +6572,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, buf->start, buf->len, parent, root->root_key.objectid, btrfs_header_level(buf), - BTRFS_DROP_DELAYED_REF, NULL, 0); + BTRFS_DROP_DELAYED_REF, NULL); BUG_ON(ret); /* -ENOMEM */ } @@ -6618,7 +6620,7 @@ out: /* Can return -ENOMEM */ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, - u64 owner, u64 offset, int no_quota) + u64 owner, u64 offset) { int ret; struct btrfs_fs_info *fs_info = root->fs_info; @@ -6641,13 +6643,13 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, num_bytes, parent, root_objectid, (int)owner, - BTRFS_DROP_DELAYED_REF, NULL, no_quota); + BTRFS_DROP_DELAYED_REF, NULL); } else { ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, num_bytes, parent, root_objectid, owner, offset, BTRFS_DROP_DELAYED_REF, - NULL, no_quota); + NULL); } return ret; } @@ -7429,8 +7431,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 parent, u64 root_objectid, u64 flags, struct btrfs_disk_key *key, - int level, struct btrfs_key *ins, - int no_quota) + int level, struct btrfs_key *ins) { int ret; struct btrfs_fs_info *fs_info = root->fs_info; @@ -7520,7 +7521,7 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid, ins->offset, 0, root_objectid, owner, offset, - BTRFS_ADD_DELAYED_EXTENT, NULL, 0); + BTRFS_ADD_DELAYED_EXTENT, NULL); return ret; } @@ -7734,7 +7735,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, ins.objectid, ins.offset, parent, root_objectid, level, BTRFS_ADD_DELAYED_EXTENT, - extent_op, 0); + extent_op); if (ret) goto out_free_delayed; } @@ -8282,7 +8283,7 @@ skip: } } ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, - root->root_key.objectid, level - 1, 0, 0); + root->root_key.objectid, level - 1, 0); BUG_ON(ret); /* -ENOMEM */ } btrfs_tree_unlock(next); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8c6f247ba..e27ea7ae7 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -756,8 +756,16 @@ next_slot: } btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - if (key.objectid > ino || - key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) + + if (key.objectid > ino) + break; + if (WARN_ON_ONCE(key.objectid < ino) || + key.type < BTRFS_EXTENT_DATA_KEY) { + ASSERT(del_nr == 0); + path->slots[0]++; + goto next_slot; + } + if (key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) break; fi = btrfs_item_ptr(leaf, path->slots[0], @@ -776,8 +784,8 @@ next_slot: btrfs_file_extent_inline_len(leaf, path->slots[0], fi); } else { - WARN_ON(1); - extent_end = search_start; + /* can't happen */ + BUG(); } /* @@ -847,7 +855,7 @@ next_slot: disk_bytenr, num_bytes, 0, root->root_key.objectid, new_key.objectid, - start - extent_offset, 1); + start - extent_offset); BUG_ON(ret); /* -ENOMEM */ } key.offset = start; @@ -925,7 +933,7 @@ delete_extent_item: disk_bytenr, num_bytes, 0, root->root_key.objectid, key.objectid, key.offset - - extent_offset, 0); + extent_offset); BUG_ON(ret); /* -ENOMEM */ inode_sub_bytes(inode, extent_end - key.offset); @@ -1204,7 +1212,7 @@ again: ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, root->root_key.objectid, - ino, orig_offset, 1); + ino, orig_offset); BUG_ON(ret); /* -ENOMEM */ if (split == start) { @@ -1231,7 +1239,7 @@ again: del_nr++; ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 0, root->root_key.objectid, - ino, orig_offset, 0); + ino, orig_offset); BUG_ON(ret); /* -ENOMEM */ } other_start = 0; @@ -1248,7 +1256,7 @@ again: del_nr++; ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 0, root->root_key.objectid, - ino, orig_offset, 0); + ino, orig_offset); BUG_ON(ret); /* -ENOMEM */ } if (del_nr == 0) { @@ -1868,8 +1876,13 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) struct btrfs_log_ctx ctx; int ret = 0; bool full_sync = 0; - const u64 len = end - start + 1; + u64 len; + /* + * The range length can be represented by u64, we have to do the typecasts + * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync() + */ + len = (u64)end - (u64)start + 1; trace_btrfs_sync_file(file, datasync); /* @@ -2057,8 +2070,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) } } if (!full_sync) { - ret = btrfs_wait_ordered_range(inode, start, - end - start + 1); + ret = btrfs_wait_ordered_range(inode, start, len); if (ret) { btrfs_end_transaction(trans, root); goto out; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 611b66d73..396e3d5c4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1294,8 +1294,14 @@ next_slot: num_bytes = 0; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid > ino || - found_key.type > BTRFS_EXTENT_DATA_KEY || + if (found_key.objectid > ino) + break; + if (WARN_ON_ONCE(found_key.objectid < ino) || + found_key.type < BTRFS_EXTENT_DATA_KEY) { + path->slots[0]++; + goto next_slot; + } + if (found_key.type > BTRFS_EXTENT_DATA_KEY || found_key.offset > end) break; @@ -2573,7 +2579,7 @@ again: ret = btrfs_inc_extent_ref(trans, root, new->bytenr, new->disk_len, 0, backref->root_id, backref->inum, - new->file_pos, 0); /* start - extent_offset */ + new->file_pos); /* start - extent_offset */ if (ret) { btrfs_abort_transaction(trans, root, ret); goto out_free_path; @@ -4217,6 +4223,47 @@ static int truncate_space_check(struct btrfs_trans_handle *trans, } +static int truncate_inline_extent(struct inode *inode, + struct btrfs_path *path, + struct btrfs_key *found_key, + const u64 item_end, + const u64 new_size) +{ + struct extent_buffer *leaf = path->nodes[0]; + int slot = path->slots[0]; + struct btrfs_file_extent_item *fi; + u32 size = (u32)(new_size - found_key->offset); + struct btrfs_root *root = BTRFS_I(inode)->root; + + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); + + if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) { + loff_t offset = new_size; + loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE); + + /* + * Zero out the remaining of the last page of our inline extent, + * instead of directly truncating our inline extent here - that + * would be much more complex (decompressing all the data, then + * compressing the truncated data, which might be bigger than + * the size of the inline extent, resize the extent, etc). + * We release the path because to get the page we might need to + * read the extent item from disk (data not in the page cache). + */ + btrfs_release_path(path); + return btrfs_truncate_page(inode, offset, page_end - offset, 0); + } + + btrfs_set_file_extent_ram_bytes(leaf, fi, size); + size = btrfs_file_extent_calc_inline_size(size); + btrfs_truncate_item(root, path, size, 1); + + if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) + inode_sub_bytes(inode, item_end + 1 - new_size); + + return 0; +} + /* * this can truncate away extent items, csum items and directory items. * It starts at a high offset and removes keys until it can't find @@ -4411,27 +4458,40 @@ search_again: * special encodings */ if (!del_item && - btrfs_file_extent_compression(leaf, fi) == 0 && btrfs_file_extent_encryption(leaf, fi) == 0 && btrfs_file_extent_other_encoding(leaf, fi) == 0) { - u32 size = new_size - found_key.offset; - - if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) - inode_sub_bytes(inode, item_end + 1 - - new_size); /* - * update the ram bytes to properly reflect - * the new size of our item + * Need to release path in order to truncate a + * compressed extent. So delete any accumulated + * extent items so far. */ - btrfs_set_file_extent_ram_bytes(leaf, fi, size); - size = - btrfs_file_extent_calc_inline_size(size); - btrfs_truncate_item(root, path, size, 1); + if (btrfs_file_extent_compression(leaf, fi) != + BTRFS_COMPRESS_NONE && pending_del_nr) { + err = btrfs_del_items(trans, root, path, + pending_del_slot, + pending_del_nr); + if (err) { + btrfs_abort_transaction(trans, + root, + err); + goto error; + } + pending_del_nr = 0; + } + + err = truncate_inline_extent(inode, path, + &found_key, + item_end, + new_size); + if (err) { + btrfs_abort_transaction(trans, + root, err); + goto error; + } } else if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { - inode_sub_bytes(inode, item_end + 1 - - found_key.offset); + inode_sub_bytes(inode, item_end + 1 - new_size); } } delete: @@ -4461,7 +4521,7 @@ delete: ret = btrfs_free_extent(trans, root, extent_start, extent_num_bytes, 0, btrfs_header_owner(leaf), - ino, extent_offset, 0); + ino, extent_offset); BUG_ON(ret); if (btrfs_should_throttle_delayed_refs(trans, root)) btrfs_async_run_delayed_refs(root, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8d20f3b1c..6548a3682 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3203,41 +3203,6 @@ out: return ret; } -/* Helper to check and see if this root currently has a ref on the given disk - * bytenr. If it does then we need to update the quota for this root. This - * doesn't do anything if quotas aren't enabled. - */ -static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 disko) -{ - struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem); - struct ulist *roots; - struct ulist_iterator uiter; - struct ulist_node *root_node = NULL; - int ret; - - if (!root->fs_info->quota_enabled) - return 1; - - btrfs_get_tree_mod_seq(root->fs_info, &tree_mod_seq_elem); - ret = btrfs_find_all_roots(trans, root->fs_info, disko, - tree_mod_seq_elem.seq, &roots); - if (ret < 0) - goto out; - ret = 0; - ULIST_ITER_INIT(&uiter); - while ((root_node = ulist_next(roots, &uiter))) { - if (root_node->val == root->objectid) { - ret = 1; - break; - } - } - ulist_free(roots); -out: - btrfs_put_tree_mod_seq(root->fs_info, &tree_mod_seq_elem); - return ret; -} - static int clone_finish_inode_update(struct btrfs_trans_handle *trans, struct inode *inode, u64 endoff, @@ -3328,6 +3293,150 @@ static void clone_update_extent_map(struct inode *inode, &BTRFS_I(inode)->runtime_flags); } +/* + * Make sure we do not end up inserting an inline extent into a file that has + * already other (non-inline) extents. If a file has an inline extent it can + * not have any other extents and the (single) inline extent must start at the + * file offset 0. Failing to respect these rules will lead to file corruption, + * resulting in EIO errors on read/write operations, hitting BUG_ON's in mm, etc + * + * We can have extents that have been already written to disk or we can have + * dirty ranges still in delalloc, in which case the extent maps and items are + * created only when we run delalloc, and the delalloc ranges might fall outside + * the range we are currently locking in the inode's io tree. So we check the + * inode's i_size because of that (i_size updates are done while holding the + * i_mutex, which we are holding here). + * We also check to see if the inode has a size not greater than "datal" but has + * extents beyond it, due to an fallocate with FALLOC_FL_KEEP_SIZE (and we are + * protected against such concurrent fallocate calls by the i_mutex). + * + * If the file has no extents but a size greater than datal, do not allow the + * copy because we would need turn the inline extent into a non-inline one (even + * with NO_HOLES enabled). If we find our destination inode only has one inline + * extent, just overwrite it with the source inline extent if its size is less + * than the source extent's size, or we could copy the source inline extent's + * data into the destination inode's inline extent if the later is greater then + * the former. + */ +static int clone_copy_inline_extent(struct inode *src, + struct inode *dst, + struct btrfs_trans_handle *trans, + struct btrfs_path *path, + struct btrfs_key *new_key, + const u64 drop_start, + const u64 datal, + const u64 skip, + const u64 size, + char *inline_data) +{ + struct btrfs_root *root = BTRFS_I(dst)->root; + const u64 aligned_end = ALIGN(new_key->offset + datal, + root->sectorsize); + int ret; + struct btrfs_key key; + + if (new_key->offset > 0) + return -EOPNOTSUPP; + + key.objectid = btrfs_ino(dst); + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = 0; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) { + return ret; + } else if (ret > 0) { + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + return ret; + else if (ret > 0) + goto copy_inline_extent; + } + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if (key.objectid == btrfs_ino(dst) && + key.type == BTRFS_EXTENT_DATA_KEY) { + ASSERT(key.offset > 0); + return -EOPNOTSUPP; + } + } else if (i_size_read(dst) <= datal) { + struct btrfs_file_extent_item *ei; + u64 ext_len; + + /* + * If the file size is <= datal, make sure there are no other + * extents following (can happen do to an fallocate call with + * the flag FALLOC_FL_KEEP_SIZE). + */ + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + /* + * If it's an inline extent, it can not have other extents + * following it. + */ + if (btrfs_file_extent_type(path->nodes[0], ei) == + BTRFS_FILE_EXTENT_INLINE) + goto copy_inline_extent; + + ext_len = btrfs_file_extent_num_bytes(path->nodes[0], ei); + if (ext_len > aligned_end) + return -EOPNOTSUPP; + + ret = btrfs_next_item(root, path); + if (ret < 0) { + return ret; + } else if (ret == 0) { + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == btrfs_ino(dst) && + key.type == BTRFS_EXTENT_DATA_KEY) + return -EOPNOTSUPP; + } + } + +copy_inline_extent: + /* + * We have no extent items, or we have an extent at offset 0 which may + * or may not be inlined. All these cases are dealt the same way. + */ + if (i_size_read(dst) > datal) { + /* + * If the destination inode has an inline extent... + * This would require copying the data from the source inline + * extent into the beginning of the destination's inline extent. + * But this is really complex, both extents can be compressed + * or just one of them, which would require decompressing and + * re-compressing data (which could increase the new compressed + * size, not allowing the compressed data to fit anymore in an + * inline extent). + * So just don't support this case for now (it should be rare, + * we are not really saving space when cloning inline extents). + */ + return -EOPNOTSUPP; + } + + btrfs_release_path(path); + ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1); + if (ret) + return ret; + ret = btrfs_insert_empty_item(trans, root, path, new_key, size); + if (ret) + return ret; + + if (skip) { + const u32 start = btrfs_file_extent_calc_inline_size(0); + + memmove(inline_data + start, inline_data + start + skip, datal); + } + + write_extent_buffer(path->nodes[0], inline_data, + btrfs_item_ptr_offset(path->nodes[0], + path->slots[0]), + size); + inode_add_bytes(dst, datal); + + return 0; +} + /** * btrfs_clone() - clone a range from inode file to another * @@ -3352,9 +3461,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode, u32 nritems; int slot; int ret; - int no_quota; const u64 len = olen_aligned; - u64 last_disko = 0; u64 last_dest_end = destoff; ret = -ENOMEM; @@ -3400,7 +3507,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode, nritems = btrfs_header_nritems(path->nodes[0]); process_slot: - no_quota = 1; if (path->slots[0] >= nritems) { ret = btrfs_next_leaf(BTRFS_I(src)->root, path); if (ret < 0) @@ -3552,35 +3658,13 @@ process_slot: btrfs_set_file_extent_num_bytes(leaf, extent, datal); - /* - * We need to look up the roots that point at - * this bytenr and see if the new root does. If - * it does not we need to make sure we update - * quotas appropriately. - */ - if (disko && root != BTRFS_I(src)->root && - disko != last_disko) { - no_quota = check_ref(trans, root, - disko); - if (no_quota < 0) { - btrfs_abort_transaction(trans, - root, - ret); - btrfs_end_transaction(trans, - root); - ret = no_quota; - goto out; - } - } - if (disko) { inode_add_bytes(inode, datal); ret = btrfs_inc_extent_ref(trans, root, disko, diskl, 0, root->root_key.objectid, btrfs_ino(inode), - new_key.offset - datao, - no_quota); + new_key.offset - datao); if (ret) { btrfs_abort_transaction(trans, root, @@ -3594,21 +3678,6 @@ process_slot: } else if (type == BTRFS_FILE_EXTENT_INLINE) { u64 skip = 0; u64 trim = 0; - u64 aligned_end = 0; - - /* - * Don't copy an inline extent into an offset - * greater than zero. Having an inline extent - * at such an offset results in chaos as btrfs - * isn't prepared for such cases. Just skip - * this case for the same reasons as commented - * at btrfs_ioctl_clone(). - */ - if (last_dest_end > 0) { - ret = -EOPNOTSUPP; - btrfs_end_transaction(trans, root); - goto out; - } if (off > key.offset) { skip = off - key.offset; @@ -3626,42 +3695,22 @@ process_slot: size -= skip + trim; datal -= skip + trim; - aligned_end = ALIGN(new_key.offset + datal, - root->sectorsize); - ret = btrfs_drop_extents(trans, root, inode, - drop_start, - aligned_end, - 1); + ret = clone_copy_inline_extent(src, inode, + trans, path, + &new_key, + drop_start, + datal, + skip, size, buf); if (ret) { if (ret != -EOPNOTSUPP) btrfs_abort_transaction(trans, - root, ret); - btrfs_end_transaction(trans, root); - goto out; - } - - ret = btrfs_insert_empty_item(trans, root, path, - &new_key, size); - if (ret) { - btrfs_abort_transaction(trans, root, - ret); + root, + ret); btrfs_end_transaction(trans, root); goto out; } - - if (skip) { - u32 start = - btrfs_file_extent_calc_inline_size(0); - memmove(buf+start, buf+start+skip, - datal); - } - leaf = path->nodes[0]; slot = path->slots[0]; - write_extent_buffer(leaf, buf, - btrfs_item_ptr_offset(leaf, slot), - size); - inode_add_bytes(inode, datal); } /* If we have an implicit hole (NO_HOLES feature). */ diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 303babeef..ab507e3d5 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1716,7 +1716,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans, ret = btrfs_inc_extent_ref(trans, root, new_bytenr, num_bytes, parent, btrfs_header_owner(leaf), - key.objectid, key.offset, 1); + key.objectid, key.offset); if (ret) { btrfs_abort_transaction(trans, root, ret); break; @@ -1724,7 +1724,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans, ret = btrfs_free_extent(trans, root, bytenr, num_bytes, parent, btrfs_header_owner(leaf), - key.objectid, key.offset, 1); + key.objectid, key.offset); if (ret) { btrfs_abort_transaction(trans, root, ret); break; @@ -1900,23 +1900,21 @@ again: ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize, path->nodes[level]->start, - src->root_key.objectid, level - 1, 0, - 1); + src->root_key.objectid, level - 1, 0); BUG_ON(ret); ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize, 0, dest->root_key.objectid, level - 1, - 0, 1); + 0); BUG_ON(ret); ret = btrfs_free_extent(trans, src, new_bytenr, blocksize, path->nodes[level]->start, - src->root_key.objectid, level - 1, 0, - 1); + src->root_key.objectid, level - 1, 0); BUG_ON(ret); ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize, 0, dest->root_key.objectid, level - 1, - 0, 1); + 0); BUG_ON(ret); btrfs_unlock_up_safe(path, 0); @@ -2745,7 +2743,7 @@ static int do_relocation(struct btrfs_trans_handle *trans, node->eb->start, blocksize, upper->eb->start, btrfs_header_owner(upper->eb), - node->level, 0, 1); + node->level, 0); BUG_ON(ret); ret = btrfs_drop_subtree(trans, root, eb, upper->eb); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index a739b825b..23bb2e4b9 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -2353,8 +2353,14 @@ static int send_subvol_begin(struct send_ctx *sctx) } TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen); - TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, - sctx->send_root->root_item.uuid); + + if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid)) + TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, + sctx->send_root->root_item.received_uuid); + else + TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, + sctx->send_root->root_item.uuid); + TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID, le64_to_cpu(sctx->send_root->root_item.ctransid)); if (parent_root) { diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1bbaace73..6f8af2de5 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -691,7 +691,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, ret = btrfs_inc_extent_ref(trans, root, ins.objectid, ins.offset, 0, root->root_key.objectid, - key->objectid, offset, 0); + key->objectid, offset); if (ret) goto out; } else { diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 6f518c90e..1fcd7b6e7 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -313,8 +313,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) /* check to make sure this item is what we want */ if (found_key.objectid != key.objectid) break; - if (found_key.type != BTRFS_XATTR_ITEM_KEY) + if (found_key.type > BTRFS_XATTR_ITEM_KEY) break; + if (found_key.type < BTRFS_XATTR_ITEM_KEY) + goto next; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); if (verify_dir_item(root, leaf, di)) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 51cb02da7..fe2c98276 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1935,7 +1935,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, len = sizeof(*head) + pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) + - sizeof(struct timespec); + sizeof(struct ceph_timespec); /* calculate (max) length for cap releases */ len += sizeof(struct ceph_mds_request_release) * diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index c711be8d6..9c8d23316 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -271,8 +271,12 @@ static struct dentry *start_creating(const char *name, struct dentry *parent) dput(dentry); dentry = ERR_PTR(-EEXIST); } - if (IS_ERR(dentry)) + + if (IS_ERR(dentry)) { mutex_unlock(&d_inode(parent)->i_mutex); + simple_release_fs(&debugfs_mount, &debugfs_mount_count); + } + return dentry; } diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index 457315581..2fab243a4 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -411,7 +411,13 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex) ext4_lblk_t lblk = ex->ee_block; ext4_fsblk_t pblk = ext4_ext_pblock(ex); unsigned int len = ext4_ext_get_actual_len(ex); - int err = 0; + int ret, err = 0; + +#if 0 + ext4_msg(inode->i_sb, KERN_CRIT, + "ext4_encrypted_zeroout ino %lu lblk %u len %u", + (unsigned long) inode->i_ino, lblk, len); +#endif BUG_ON(inode->i_sb->s_blocksize != PAGE_CACHE_SIZE); @@ -437,17 +443,26 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex) goto errout; } bio->bi_bdev = inode->i_sb->s_bdev; - bio->bi_iter.bi_sector = pblk; - err = bio_add_page(bio, ciphertext_page, + bio->bi_iter.bi_sector = + pblk << (inode->i_sb->s_blocksize_bits - 9); + ret = bio_add_page(bio, ciphertext_page, inode->i_sb->s_blocksize, 0); - if (err) { + if (ret != inode->i_sb->s_blocksize) { + /* should never happen! */ + ext4_msg(inode->i_sb, KERN_ERR, + "bio_add_page failed: %d", ret); + WARN_ON(1); bio_put(bio); + err = -EIO; goto errout; } err = submit_bio_wait(WRITE, bio); + if ((err == 0) && bio->bi_error) + err = -EIO; bio_put(bio); if (err) goto errout; + lblk++; pblk++; } err = 0; errout: diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index d41843181..e770c1ee4 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -88,13 +88,13 @@ int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) return 0; } + err = handle->h_err; if (!handle->h_transaction) { - err = jbd2_journal_stop(handle); - return handle->h_err ? handle->h_err : err; + rc = jbd2_journal_stop(handle); + return err ? err : rc; } sb = handle->h_transaction->t_journal->j_private; - err = handle->h_err; rc = jbd2_journal_stop(handle); if (!err) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 2553aa8b6..7f486e350 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3558,6 +3558,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, max_zeroout = sbi->s_extent_max_zeroout_kb >> (inode->i_sb->s_blocksize_bits - 10); + if (ext4_encrypted_inode(inode)) + max_zeroout = 0; + /* If extent is less than s_max_zeroout_kb, zeroout directly */ if (max_zeroout && (ee_len <= max_zeroout)) { err = ext4_ext_zeroout(inode, ex); diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 84ba4d2b3..17fbe3882 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -425,6 +425,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, struct buffer_head *bh, *head; int ret = 0; int nr_submitted = 0; + int nr_to_submit = 0; blocksize = 1 << inode->i_blkbits; @@ -477,11 +478,13 @@ int ext4_bio_write_page(struct ext4_io_submit *io, unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); } set_buffer_async_write(bh); + nr_to_submit++; } while ((bh = bh->b_this_page) != head); bh = head = page_buffers(page); - if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { + if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode) && + nr_to_submit) { data_page = ext4_encrypt(inode, page); if (IS_ERR(data_page)) { ret = PTR_ERR(data_page); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a63c7b0a1..df84bd256 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -394,9 +394,13 @@ static void ext4_handle_error(struct super_block *sb) smp_wmb(); sb->s_flags |= MS_RDONLY; } - if (test_opt(sb, ERRORS_PANIC)) + if (test_opt(sb, ERRORS_PANIC)) { + if (EXT4_SB(sb)->s_journal && + !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR)) + return; panic("EXT4-fs (device %s): panic forced after error\n", sb->s_id); + } } #define ext4_error_ratelimit(sb) \ @@ -585,8 +589,12 @@ void __ext4_abort(struct super_block *sb, const char *function, jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); save_error_info(sb, function, line); } - if (test_opt(sb, ERRORS_PANIC)) + if (test_opt(sb, ERRORS_PANIC)) { + if (EXT4_SB(sb)->s_journal && + !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR)) + return; panic("EXT4-fs panic from previous error\n"); + } } void __ext4_msg(struct super_block *sb, diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 8270fe9e3..37023d0bd 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2071,8 +2071,12 @@ static void __journal_abort_soft (journal_t *journal, int errno) __jbd2_journal_abort_hard(journal); - if (errno) + if (errno) { jbd2_journal_update_sb_errno(journal); + write_lock(&journal->j_state_lock); + journal->j_flags |= JBD2_REC_ERR; + write_unlock(&journal->j_state_lock); + } } /** diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 326d9e10d..ffdf9b9e8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1824,7 +1824,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0) nfsi->attr_gencount = fattr->gencount; } - invalid &= ~NFS_INO_INVALID_ATTR; + + /* Don't declare attrcache up to date if there were no attrs! */ + if (fattr->valid != 0) + invalid &= ~NFS_INO_INVALID_ATTR; + /* Don't invalidate the data if we were to blame */ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 223bedda6..10410e8b5 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -33,7 +33,7 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) return ret; idr_preload(GFP_KERNEL); spin_lock(&nn->nfs_client_lock); - ret = idr_alloc(&nn->cb_ident_idr, clp, 0, 0, GFP_NOWAIT); + ret = idr_alloc(&nn->cb_ident_idr, clp, 1, 0, GFP_NOWAIT); if (ret >= 0) clp->cl_cb_ident = ret; spin_unlock(&nn->nfs_client_lock); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 8abe27165..abf5caea2 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -872,33 +872,38 @@ send_layoutget(struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); - lgp = kzalloc(sizeof(*lgp), gfp_flags); - if (lgp == NULL) - return NULL; + /* + * Synchronously retrieve layout information from server and + * store in lseg. If we race with a concurrent seqid morphing + * op, then re-send the LAYOUTGET. + */ + do { + lgp = kzalloc(sizeof(*lgp), gfp_flags); + if (lgp == NULL) + return NULL; + + i_size = i_size_read(ino); + + lgp->args.minlength = PAGE_CACHE_SIZE; + if (lgp->args.minlength > range->length) + lgp->args.minlength = range->length; + if (range->iomode == IOMODE_READ) { + if (range->offset >= i_size) + lgp->args.minlength = 0; + else if (i_size - range->offset < lgp->args.minlength) + lgp->args.minlength = i_size - range->offset; + } + lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; + lgp->args.range = *range; + lgp->args.type = server->pnfs_curr_ld->id; + lgp->args.inode = ino; + lgp->args.ctx = get_nfs_open_context(ctx); + lgp->gfp_flags = gfp_flags; + lgp->cred = lo->plh_lc_cred; - i_size = i_size_read(ino); + lseg = nfs4_proc_layoutget(lgp, gfp_flags); + } while (lseg == ERR_PTR(-EAGAIN)); - lgp->args.minlength = PAGE_CACHE_SIZE; - if (lgp->args.minlength > range->length) - lgp->args.minlength = range->length; - if (range->iomode == IOMODE_READ) { - if (range->offset >= i_size) - lgp->args.minlength = 0; - else if (i_size - range->offset < lgp->args.minlength) - lgp->args.minlength = i_size - range->offset; - } - lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; - lgp->args.range = *range; - lgp->args.type = server->pnfs_curr_ld->id; - lgp->args.inode = ino; - lgp->args.ctx = get_nfs_open_context(ctx); - lgp->gfp_flags = gfp_flags; - lgp->cred = lo->plh_lc_cred; - - /* Synchronously retrieve layout information from server and - * store in lseg. - */ - lseg = nfs4_proc_layoutget(lgp, gfp_flags); if (IS_ERR(lseg)) { switch (PTR_ERR(lseg)) { case -ENOMEM: @@ -1687,6 +1692,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) /* existing state ID, make sure the sequence number matches. */ if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { dprintk("%s forget reply due to sequence\n", __func__); + status = -EAGAIN; goto out_forget_reply; } pnfs_set_layout_stateid(lo, &res->stateid, false); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0f1d5691b..0dea0c254 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -765,16 +765,68 @@ void nfs4_unhash_stid(struct nfs4_stid *s) s->sc_type = 0; } -static void +/** + * nfs4_get_existing_delegation - Discover if this delegation already exists + * @clp: a pointer to the nfs4_client we're granting a delegation to + * @fp: a pointer to the nfs4_file we're granting a delegation on + * + * Return: + * On success: NULL if an existing delegation was not found. + * + * On error: -EAGAIN if one was previously granted to this nfs4_client + * for this nfs4_file. + * + */ + +static int +nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp) +{ + struct nfs4_delegation *searchdp = NULL; + struct nfs4_client *searchclp = NULL; + + lockdep_assert_held(&state_lock); + lockdep_assert_held(&fp->fi_lock); + + list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) { + searchclp = searchdp->dl_stid.sc_client; + if (clp == searchclp) { + return -EAGAIN; + } + } + return 0; +} + +/** + * hash_delegation_locked - Add a delegation to the appropriate lists + * @dp: a pointer to the nfs4_delegation we are adding. + * @fp: a pointer to the nfs4_file we're granting a delegation on + * + * Return: + * On success: NULL if the delegation was successfully hashed. + * + * On error: -EAGAIN if one was previously granted to this + * nfs4_client for this nfs4_file. Delegation is not hashed. + * + */ + +static int hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) { + int status; + struct nfs4_client *clp = dp->dl_stid.sc_client; + lockdep_assert_held(&state_lock); lockdep_assert_held(&fp->fi_lock); + status = nfs4_get_existing_delegation(clp, fp); + if (status) + return status; + ++fp->fi_delegees; atomic_inc(&dp->dl_stid.sc_count); dp->dl_stid.sc_type = NFS4_DELEG_STID; list_add(&dp->dl_perfile, &fp->fi_delegations); - list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); + list_add(&dp->dl_perclnt, &clp->cl_delegations); + return 0; } static bool @@ -3360,6 +3412,7 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, stp->st_access_bmap = 0; stp->st_deny_bmap = 0; stp->st_openstp = NULL; + init_rwsem(&stp->st_rwsem); spin_lock(&oo->oo_owner.so_client->cl_lock); list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); spin_lock(&fp->fi_lock); @@ -3945,6 +3998,18 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag) return fl; } +/** + * nfs4_setlease - Obtain a delegation by requesting lease from vfs layer + * @dp: a pointer to the nfs4_delegation we're adding. + * + * Return: + * On success: Return code will be 0 on success. + * + * On error: -EAGAIN if there was an existing delegation. + * nonzero if there is an error in other cases. + * + */ + static int nfs4_setlease(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_stid.sc_file; @@ -3976,16 +4041,19 @@ static int nfs4_setlease(struct nfs4_delegation *dp) goto out_unlock; /* Race breaker */ if (fp->fi_deleg_file) { - status = 0; - ++fp->fi_delegees; - hash_delegation_locked(dp, fp); + status = hash_delegation_locked(dp, fp); goto out_unlock; } fp->fi_deleg_file = filp; - fp->fi_delegees = 1; - hash_delegation_locked(dp, fp); + fp->fi_delegees = 0; + status = hash_delegation_locked(dp, fp); spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); + if (status) { + /* Should never happen, this is a new fi_deleg_file */ + WARN_ON_ONCE(1); + goto out_fput; + } return 0; out_unlock: spin_unlock(&fp->fi_lock); @@ -4005,6 +4073,15 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, if (fp->fi_had_conflict) return ERR_PTR(-EAGAIN); + spin_lock(&state_lock); + spin_lock(&fp->fi_lock); + status = nfs4_get_existing_delegation(clp, fp); + spin_unlock(&fp->fi_lock); + spin_unlock(&state_lock); + + if (status) + return ERR_PTR(status); + dp = alloc_init_deleg(clp, fh, odstate); if (!dp) return ERR_PTR(-ENOMEM); @@ -4023,9 +4100,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, status = -EAGAIN; goto out_unlock; } - ++fp->fi_delegees; - hash_delegation_locked(dp, fp); - status = 0; + status = hash_delegation_locked(dp, fp); out_unlock: spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); @@ -4187,15 +4262,20 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf */ if (stp) { /* Stateid was found, this is an OPEN upgrade */ + down_read(&stp->st_rwsem); status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); - if (status) + if (status) { + up_read(&stp->st_rwsem); goto out; + } } else { stp = open->op_stp; open->op_stp = NULL; init_open_stateid(stp, fp, open); + down_read(&stp->st_rwsem); status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); if (status) { + up_read(&stp->st_rwsem); release_open_stateid(stp); goto out; } @@ -4207,6 +4287,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf } update_stateid(&stp->st_stid.sc_stateid); memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + up_read(&stp->st_rwsem); if (nfsd4_has_session(&resp->cstate)) { if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { @@ -4819,10 +4900,13 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ * revoked delegations are kept only for free_stateid. */ return nfserr_bad_stateid; + down_write(&stp->st_rwsem); status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); - if (status) - return status; - return nfs4_check_fh(current_fh, &stp->st_stid); + if (status == nfs_ok) + status = nfs4_check_fh(current_fh, &stp->st_stid); + if (status != nfs_ok) + up_write(&stp->st_rwsem); + return status; } /* @@ -4869,6 +4953,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs return status; oo = openowner(stp->st_stateowner); if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { + up_write(&stp->st_rwsem); nfs4_put_stid(&stp->st_stid); return nfserr_bad_stateid; } @@ -4899,11 +4984,14 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; oo = openowner(stp->st_stateowner); status = nfserr_bad_stateid; - if (oo->oo_flags & NFS4_OO_CONFIRMED) + if (oo->oo_flags & NFS4_OO_CONFIRMED) { + up_write(&stp->st_rwsem); goto put_stateid; + } oo->oo_flags |= NFS4_OO_CONFIRMED; update_stateid(&stp->st_stid.sc_stateid); memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + up_write(&stp->st_rwsem); dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); @@ -4982,6 +5070,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); status = nfs_ok; put_stateid: + up_write(&stp->st_rwsem); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); @@ -5035,6 +5124,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; update_stateid(&stp->st_stid.sc_stateid); memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + up_write(&stp->st_rwsem); nfsd4_close_open_stateid(stp); @@ -5260,6 +5350,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; + init_rwsem(&stp->st_rwsem); list_add(&stp->st_locks, &open_stp->st_locks); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); spin_lock(&fp->fi_lock); @@ -5428,6 +5519,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &open_stp, nn); if (status) goto out; + up_write(&open_stp->st_rwsem); open_sop = openowner(open_stp->st_stateowner); status = nfserr_bad_stateid; if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, @@ -5435,6 +5527,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; status = lookup_or_create_lock_state(cstate, open_stp, lock, &lock_stp, &new); + if (status == nfs_ok) + down_write(&lock_stp->st_rwsem); } else { status = nfs4_preprocess_seqid_op(cstate, lock->lk_old_lock_seqid, @@ -5540,6 +5634,8 @@ out: seqid_mutating_err(ntohl(status))) lock_sop->lo_owner.so_seqid++; + up_write(&lock_stp->st_rwsem); + /* * If this is a new, never-before-used stateid, and we are * returning an error, then just go ahead and release it. @@ -5709,6 +5805,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fput: fput(filp); put_stateid: + up_write(&stp->st_rwsem); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 583ffc13c..31bde12fe 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -534,15 +534,16 @@ struct nfs4_file { * Better suggestions welcome. */ struct nfs4_ol_stateid { - struct nfs4_stid st_stid; /* must be first field */ - struct list_head st_perfile; - struct list_head st_perstateowner; - struct list_head st_locks; - struct nfs4_stateowner * st_stateowner; - struct nfs4_clnt_odstate * st_clnt_odstate; - unsigned char st_access_bmap; - unsigned char st_deny_bmap; - struct nfs4_ol_stateid * st_openstp; + struct nfs4_stid st_stid; + struct list_head st_perfile; + struct list_head st_perstateowner; + struct list_head st_locks; + struct nfs4_stateowner *st_stateowner; + struct nfs4_clnt_odstate *st_clnt_odstate; + unsigned char st_access_bmap; + unsigned char st_deny_bmap; + struct nfs4_ol_stateid *st_openstp; + struct rw_semaphore st_rwsem; }; static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index b7dfac226..12bfa9ca5 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -374,6 +374,8 @@ static int ocfs2_mknod(struct inode *dir, mlog_errno(status); goto leave; } + /* update inode->i_mode after mask with "umask". */ + inode->i_mode = mode; handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb, S_ISDIR(mode), -- cgit v1.2.3