summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-12-16 14:55:49 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-12-16 14:55:49 -0300
commitf3a16ba6a1152b8966dcadc668af4cf00623c7b1 (patch)
tree5fee49a027f6fddf70b29369d24703946370eb77 /fs
parentb652965369918b9d992dc42fb060240f94d98769 (diff)
Linux-libre 4.3.3-gnu
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.h4
-rw-r--r--fs/btrfs/delayed-ref.c139
-rw-r--r--fs/btrfs/delayed-ref.h7
-rw-r--r--fs/btrfs/extent-tree.c59
-rw-r--r--fs/btrfs/file.c36
-rw-r--r--fs/btrfs/inode.c96
-rw-r--r--fs/btrfs/ioctl.c257
-rw-r--r--fs/btrfs/relocation.c16
-rw-r--r--fs/btrfs/send.c10
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/xattr.c4
-rw-r--r--fs/ceph/mds_client.c2
-rw-r--r--fs/debugfs/inode.c6
-rw-r--r--fs/ext4/crypto.c23
-rw-r--r--fs/ext4/ext4_jbd2.c6
-rw-r--r--fs/ext4/extents.c3
-rw-r--r--fs/ext4/page-io.c5
-rw-r--r--fs/ext4/super.c12
-rw-r--r--fs/jbd2/journal.c6
-rw-r--r--fs/nfs/inode.c6
-rw-r--r--fs/nfs/nfs4client.c2
-rw-r--r--fs/nfs/pnfs.c56
-rw-r--r--fs/nfsd/nfs4state.c127
-rw-r--r--fs/nfsd/state.h19
-rw-r--r--fs/ocfs2/namei.c2
25 files changed, 639 insertions, 266 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 938efe33b..94eea1f43 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3398,7 +3398,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
int btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
- u64 owner, u64 offset, int no_quota);
+ u64 owner, u64 offset);
int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len,
int delalloc);
@@ -3411,7 +3411,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
- u64 root_objectid, u64 owner, u64 offset, int no_quota);
+ u64 root_objectid, u64 owner, u64 offset);
int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index ac3e81da6..7832031fe 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -197,6 +197,119 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
trans->delayed_ref_updates--;
}
+static bool merge_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_head *head,
+ struct btrfs_delayed_ref_node *ref,
+ u64 seq)
+{
+ struct btrfs_delayed_ref_node *next;
+ bool done = false;
+
+ next = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
+ list);
+ while (!done && &next->list != &head->ref_list) {
+ int mod;
+ struct btrfs_delayed_ref_node *next2;
+
+ next2 = list_next_entry(next, list);
+
+ if (next == ref)
+ goto next;
+
+ if (seq && next->seq >= seq)
+ goto next;
+
+ if (next->type != ref->type)
+ goto next;
+
+ if ((ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
+ ref->type == BTRFS_SHARED_BLOCK_REF_KEY) &&
+ comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref),
+ btrfs_delayed_node_to_tree_ref(next),
+ ref->type))
+ goto next;
+ if ((ref->type == BTRFS_EXTENT_DATA_REF_KEY ||
+ ref->type == BTRFS_SHARED_DATA_REF_KEY) &&
+ comp_data_refs(btrfs_delayed_node_to_data_ref(ref),
+ btrfs_delayed_node_to_data_ref(next)))
+ goto next;
+
+ if (ref->action == next->action) {
+ mod = next->ref_mod;
+ } else {
+ if (ref->ref_mod < next->ref_mod) {
+ swap(ref, next);
+ done = true;
+ }
+ mod = -next->ref_mod;
+ }
+
+ drop_delayed_ref(trans, delayed_refs, head, next);
+ ref->ref_mod += mod;
+ if (ref->ref_mod == 0) {
+ drop_delayed_ref(trans, delayed_refs, head, ref);
+ done = true;
+ } else {
+ /*
+ * Can't have multiples of the same ref on a tree block.
+ */
+ WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
+ ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
+ }
+next:
+ next = next2;
+ }
+
+ return done;
+}
+
+void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_head *head)
+{
+ struct btrfs_delayed_ref_node *ref;
+ u64 seq = 0;
+
+ assert_spin_locked(&head->lock);
+
+ if (list_empty(&head->ref_list))
+ return;
+
+ /* We don't have too many refs to merge for data. */
+ if (head->is_data)
+ return;
+
+ spin_lock(&fs_info->tree_mod_seq_lock);
+ if (!list_empty(&fs_info->tree_mod_seq_list)) {
+ struct seq_list *elem;
+
+ elem = list_first_entry(&fs_info->tree_mod_seq_list,
+ struct seq_list, list);
+ seq = elem->seq;
+ }
+ spin_unlock(&fs_info->tree_mod_seq_lock);
+
+ ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
+ list);
+ while (&ref->list != &head->ref_list) {
+ if (seq && ref->seq >= seq)
+ goto next;
+
+ if (merge_ref(trans, delayed_refs, head, ref, seq)) {
+ if (list_empty(&head->ref_list))
+ break;
+ ref = list_first_entry(&head->ref_list,
+ struct btrfs_delayed_ref_node,
+ list);
+ continue;
+ }
+next:
+ ref = list_next_entry(ref, list);
+ }
+}
+
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
u64 seq)
@@ -292,8 +405,7 @@ add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node,
list);
/* No need to compare bytenr nor is_head */
- if (exist->type != ref->type || exist->no_quota != ref->no_quota ||
- exist->seq != ref->seq)
+ if (exist->type != ref->type || exist->seq != ref->seq)
goto add_tail;
if ((exist->type == BTRFS_TREE_BLOCK_REF_KEY ||
@@ -524,7 +636,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head_ref,
struct btrfs_delayed_ref_node *ref, u64 bytenr,
u64 num_bytes, u64 parent, u64 ref_root, int level,
- int action, int no_quota)
+ int action)
{
struct btrfs_delayed_tree_ref *full_ref;
struct btrfs_delayed_ref_root *delayed_refs;
@@ -546,7 +658,6 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
ref->action = action;
ref->is_head = 0;
ref->in_tree = 1;
- ref->no_quota = no_quota;
ref->seq = seq;
full_ref = btrfs_delayed_node_to_tree_ref(ref);
@@ -579,7 +690,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head_ref,
struct btrfs_delayed_ref_node *ref, u64 bytenr,
u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
- u64 offset, int action, int no_quota)
+ u64 offset, int action)
{
struct btrfs_delayed_data_ref *full_ref;
struct btrfs_delayed_ref_root *delayed_refs;
@@ -602,7 +713,6 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
ref->action = action;
ref->is_head = 0;
ref->in_tree = 1;
- ref->no_quota = no_quota;
ref->seq = seq;
full_ref = btrfs_delayed_node_to_data_ref(ref);
@@ -633,17 +743,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
- struct btrfs_delayed_extent_op *extent_op,
- int no_quota)
+ struct btrfs_delayed_extent_op *extent_op)
{
struct btrfs_delayed_tree_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
- if (!is_fstree(ref_root) || !fs_info->quota_enabled)
- no_quota = 0;
-
BUG_ON(extent_op && extent_op->is_data);
ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
if (!ref)
@@ -672,8 +778,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
bytenr, num_bytes, action, 0);
add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
- num_bytes, parent, ref_root, level, action,
- no_quota);
+ num_bytes, parent, ref_root, level, action);
spin_unlock(&delayed_refs->lock);
return 0;
@@ -694,17 +799,13 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
u64 owner, u64 offset, int action,
- struct btrfs_delayed_extent_op *extent_op,
- int no_quota)
+ struct btrfs_delayed_extent_op *extent_op)
{
struct btrfs_delayed_data_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
- if (!is_fstree(ref_root) || !fs_info->quota_enabled)
- no_quota = 0;
-
BUG_ON(extent_op && !extent_op->is_data);
ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
if (!ref)
@@ -740,7 +841,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, owner, offset,
- action, no_quota);
+ action);
spin_unlock(&delayed_refs->lock);
return 0;
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 13fb5e609..930887a42 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -68,7 +68,6 @@ struct btrfs_delayed_ref_node {
unsigned int action:8;
unsigned int type:8;
- unsigned int no_quota:1;
/* is this node still in the rbtree? */
unsigned int is_head:1;
unsigned int in_tree:1;
@@ -233,15 +232,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
- struct btrfs_delayed_extent_op *extent_op,
- int no_quota);
+ struct btrfs_delayed_extent_op *extent_op);
int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
u64 owner, u64 offset, int action,
- struct btrfs_delayed_extent_op *extent_op,
- int no_quota);
+ struct btrfs_delayed_extent_op *extent_op);
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 601d7d45d..cadacf643 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -95,8 +95,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 parent, u64 root_objectid,
u64 flags, struct btrfs_disk_key *key,
- int level, struct btrfs_key *ins,
- int no_quota);
+ int level, struct btrfs_key *ins);
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 flags,
int force);
@@ -2009,8 +2008,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
- u64 root_objectid, u64 owner, u64 offset,
- int no_quota)
+ u64 root_objectid, u64 owner, u64 offset)
{
int ret;
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -2022,12 +2020,12 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
num_bytes,
parent, root_objectid, (int)owner,
- BTRFS_ADD_DELAYED_REF, NULL, no_quota);
+ BTRFS_ADD_DELAYED_REF, NULL);
} else {
ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
num_bytes,
parent, root_objectid, owner, offset,
- BTRFS_ADD_DELAYED_REF, NULL, no_quota);
+ BTRFS_ADD_DELAYED_REF, NULL);
}
return ret;
}
@@ -2048,15 +2046,11 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
u64 num_bytes = node->num_bytes;
u64 refs;
int ret;
- int no_quota = node->no_quota;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- if (!is_fstree(root_objectid) || !root->fs_info->quota_enabled)
- no_quota = 1;
-
path->reada = 1;
path->leave_spinning = 1;
/* this will setup the path even if it fails to insert the back ref */
@@ -2291,8 +2285,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
parent, ref_root,
extent_op->flags_to_set,
&extent_op->key,
- ref->level, &ins,
- node->no_quota);
+ ref->level, &ins);
} else if (node->action == BTRFS_ADD_DELAYED_REF) {
ret = __btrfs_inc_extent_ref(trans, root, node,
parent, ref_root,
@@ -2433,7 +2426,21 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
}
}
+ /*
+ * We need to try and merge add/drops of the same ref since we
+ * can run into issues with relocate dropping the implicit ref
+ * and then it being added back again before the drop can
+ * finish. If we merged anything we need to re-loop so we can
+ * get a good ref.
+ * Or we can get node references of the same type that weren't
+ * merged when created due to bumps in the tree mod seq, and
+ * we need to merge them to prevent adding an inline extent
+ * backref before dropping it (triggering a BUG_ON at
+ * insert_inline_extent_backref()).
+ */
spin_lock(&locked_ref->lock);
+ btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
+ locked_ref);
/*
* locked_ref is the head node, so we have to go one
@@ -3109,7 +3116,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
int level;
int ret = 0;
int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
- u64, u64, u64, u64, u64, u64, int);
+ u64, u64, u64, u64, u64, u64);
if (btrfs_test_is_dummy_root(root))
@@ -3150,15 +3157,14 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
key.offset -= btrfs_file_extent_offset(buf, fi);
ret = process_func(trans, root, bytenr, num_bytes,
parent, ref_root, key.objectid,
- key.offset, 1);
+ key.offset);
if (ret)
goto fail;
} else {
bytenr = btrfs_node_blockptr(buf, i);
num_bytes = root->nodesize;
ret = process_func(trans, root, bytenr, num_bytes,
- parent, ref_root, level - 1, 0,
- 1);
+ parent, ref_root, level - 1, 0);
if (ret)
goto fail;
}
@@ -6233,7 +6239,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
int extent_slot = 0;
int found_extent = 0;
int num_to_del = 1;
- int no_quota = node->no_quota;
u32 item_size;
u64 refs;
u64 bytenr = node->bytenr;
@@ -6242,9 +6247,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
SKINNY_METADATA);
- if (!info->quota_enabled || !is_fstree(root_objectid))
- no_quota = 1;
-
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -6570,7 +6572,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
buf->start, buf->len,
parent, root->root_key.objectid,
btrfs_header_level(buf),
- BTRFS_DROP_DELAYED_REF, NULL, 0);
+ BTRFS_DROP_DELAYED_REF, NULL);
BUG_ON(ret); /* -ENOMEM */
}
@@ -6618,7 +6620,7 @@ out:
/* Can return -ENOMEM */
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
- u64 owner, u64 offset, int no_quota)
+ u64 owner, u64 offset)
{
int ret;
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -6641,13 +6643,13 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
num_bytes,
parent, root_objectid, (int)owner,
- BTRFS_DROP_DELAYED_REF, NULL, no_quota);
+ BTRFS_DROP_DELAYED_REF, NULL);
} else {
ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
num_bytes,
parent, root_objectid, owner,
offset, BTRFS_DROP_DELAYED_REF,
- NULL, no_quota);
+ NULL);
}
return ret;
}
@@ -7429,8 +7431,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 parent, u64 root_objectid,
u64 flags, struct btrfs_disk_key *key,
- int level, struct btrfs_key *ins,
- int no_quota)
+ int level, struct btrfs_key *ins)
{
int ret;
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -7520,7 +7521,7 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid,
ins->offset, 0,
root_objectid, owner, offset,
- BTRFS_ADD_DELAYED_EXTENT, NULL, 0);
+ BTRFS_ADD_DELAYED_EXTENT, NULL);
return ret;
}
@@ -7734,7 +7735,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
ins.objectid, ins.offset,
parent, root_objectid, level,
BTRFS_ADD_DELAYED_EXTENT,
- extent_op, 0);
+ extent_op);
if (ret)
goto out_free_delayed;
}
@@ -8282,7 +8283,7 @@ skip:
}
}
ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
- root->root_key.objectid, level - 1, 0, 0);
+ root->root_key.objectid, level - 1, 0);
BUG_ON(ret); /* -ENOMEM */
}
btrfs_tree_unlock(next);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 8c6f247ba..e27ea7ae7 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -756,8 +756,16 @@ next_slot:
}
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- if (key.objectid > ino ||
- key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
+
+ if (key.objectid > ino)
+ break;
+ if (WARN_ON_ONCE(key.objectid < ino) ||
+ key.type < BTRFS_EXTENT_DATA_KEY) {
+ ASSERT(del_nr == 0);
+ path->slots[0]++;
+ goto next_slot;
+ }
+ if (key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
break;
fi = btrfs_item_ptr(leaf, path->slots[0],
@@ -776,8 +784,8 @@ next_slot:
btrfs_file_extent_inline_len(leaf,
path->slots[0], fi);
} else {
- WARN_ON(1);
- extent_end = search_start;
+ /* can't happen */
+ BUG();
}
/*
@@ -847,7 +855,7 @@ next_slot:
disk_bytenr, num_bytes, 0,
root->root_key.objectid,
new_key.objectid,
- start - extent_offset, 1);
+ start - extent_offset);
BUG_ON(ret); /* -ENOMEM */
}
key.offset = start;
@@ -925,7 +933,7 @@ delete_extent_item:
disk_bytenr, num_bytes, 0,
root->root_key.objectid,
key.objectid, key.offset -
- extent_offset, 0);
+ extent_offset);
BUG_ON(ret); /* -ENOMEM */
inode_sub_bytes(inode,
extent_end - key.offset);
@@ -1204,7 +1212,7 @@ again:
ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
root->root_key.objectid,
- ino, orig_offset, 1);
+ ino, orig_offset);
BUG_ON(ret); /* -ENOMEM */
if (split == start) {
@@ -1231,7 +1239,7 @@ again:
del_nr++;
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
0, root->root_key.objectid,
- ino, orig_offset, 0);
+ ino, orig_offset);
BUG_ON(ret); /* -ENOMEM */
}
other_start = 0;
@@ -1248,7 +1256,7 @@ again:
del_nr++;
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
0, root->root_key.objectid,
- ino, orig_offset, 0);
+ ino, orig_offset);
BUG_ON(ret); /* -ENOMEM */
}
if (del_nr == 0) {
@@ -1868,8 +1876,13 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
struct btrfs_log_ctx ctx;
int ret = 0;
bool full_sync = 0;
- const u64 len = end - start + 1;
+ u64 len;
+ /*
+ * The range length can be represented by u64, we have to do the typecasts
+ * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync()
+ */
+ len = (u64)end - (u64)start + 1;
trace_btrfs_sync_file(file, datasync);
/*
@@ -2057,8 +2070,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
}
}
if (!full_sync) {
- ret = btrfs_wait_ordered_range(inode, start,
- end - start + 1);
+ ret = btrfs_wait_ordered_range(inode, start, len);
if (ret) {
btrfs_end_transaction(trans, root);
goto out;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 611b66d73..396e3d5c4 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1294,8 +1294,14 @@ next_slot:
num_bytes = 0;
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- if (found_key.objectid > ino ||
- found_key.type > BTRFS_EXTENT_DATA_KEY ||
+ if (found_key.objectid > ino)
+ break;
+ if (WARN_ON_ONCE(found_key.objectid < ino) ||
+ found_key.type < BTRFS_EXTENT_DATA_KEY) {
+ path->slots[0]++;
+ goto next_slot;
+ }
+ if (found_key.type > BTRFS_EXTENT_DATA_KEY ||
found_key.offset > end)
break;
@@ -2573,7 +2579,7 @@ again:
ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
new->disk_len, 0,
backref->root_id, backref->inum,
- new->file_pos, 0); /* start - extent_offset */
+ new->file_pos); /* start - extent_offset */
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto out_free_path;
@@ -4217,6 +4223,47 @@ static int truncate_space_check(struct btrfs_trans_handle *trans,
}
+static int truncate_inline_extent(struct inode *inode,
+ struct btrfs_path *path,
+ struct btrfs_key *found_key,
+ const u64 item_end,
+ const u64 new_size)
+{
+ struct extent_buffer *leaf = path->nodes[0];
+ int slot = path->slots[0];
+ struct btrfs_file_extent_item *fi;
+ u32 size = (u32)(new_size - found_key->offset);
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+
+ fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+
+ if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) {
+ loff_t offset = new_size;
+ loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE);
+
+ /*
+ * Zero out the remaining of the last page of our inline extent,
+ * instead of directly truncating our inline extent here - that
+ * would be much more complex (decompressing all the data, then
+ * compressing the truncated data, which might be bigger than
+ * the size of the inline extent, resize the extent, etc).
+ * We release the path because to get the page we might need to
+ * read the extent item from disk (data not in the page cache).
+ */
+ btrfs_release_path(path);
+ return btrfs_truncate_page(inode, offset, page_end - offset, 0);
+ }
+
+ btrfs_set_file_extent_ram_bytes(leaf, fi, size);
+ size = btrfs_file_extent_calc_inline_size(size);
+ btrfs_truncate_item(root, path, size, 1);
+
+ if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
+ inode_sub_bytes(inode, item_end + 1 - new_size);
+
+ return 0;
+}
+
/*
* this can truncate away extent items, csum items and directory items.
* It starts at a high offset and removes keys until it can't find
@@ -4411,27 +4458,40 @@ search_again:
* special encodings
*/
if (!del_item &&
- btrfs_file_extent_compression(leaf, fi) == 0 &&
btrfs_file_extent_encryption(leaf, fi) == 0 &&
btrfs_file_extent_other_encoding(leaf, fi) == 0) {
- u32 size = new_size - found_key.offset;
-
- if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
- inode_sub_bytes(inode, item_end + 1 -
- new_size);
/*
- * update the ram bytes to properly reflect
- * the new size of our item
+ * Need to release path in order to truncate a
+ * compressed extent. So delete any accumulated
+ * extent items so far.
*/
- btrfs_set_file_extent_ram_bytes(leaf, fi, size);
- size =
- btrfs_file_extent_calc_inline_size(size);
- btrfs_truncate_item(root, path, size, 1);
+ if (btrfs_file_extent_compression(leaf, fi) !=
+ BTRFS_COMPRESS_NONE && pending_del_nr) {
+ err = btrfs_del_items(trans, root, path,
+ pending_del_slot,
+ pending_del_nr);
+ if (err) {
+ btrfs_abort_transaction(trans,
+ root,
+ err);
+ goto error;
+ }
+ pending_del_nr = 0;
+ }
+
+ err = truncate_inline_extent(inode, path,
+ &found_key,
+ item_end,
+ new_size);
+ if (err) {
+ btrfs_abort_transaction(trans,
+ root, err);
+ goto error;
+ }
} else if (test_bit(BTRFS_ROOT_REF_COWS,
&root->state)) {
- inode_sub_bytes(inode, item_end + 1 -
- found_key.offset);
+ inode_sub_bytes(inode, item_end + 1 - new_size);
}
}
delete:
@@ -4461,7 +4521,7 @@ delete:
ret = btrfs_free_extent(trans, root, extent_start,
extent_num_bytes, 0,
btrfs_header_owner(leaf),
- ino, extent_offset, 0);
+ ino, extent_offset);
BUG_ON(ret);
if (btrfs_should_throttle_delayed_refs(trans, root))
btrfs_async_run_delayed_refs(root,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8d20f3b1c..6548a3682 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3203,41 +3203,6 @@ out:
return ret;
}
-/* Helper to check and see if this root currently has a ref on the given disk
- * bytenr. If it does then we need to update the quota for this root. This
- * doesn't do anything if quotas aren't enabled.
- */
-static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- u64 disko)
-{
- struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
- struct ulist *roots;
- struct ulist_iterator uiter;
- struct ulist_node *root_node = NULL;
- int ret;
-
- if (!root->fs_info->quota_enabled)
- return 1;
-
- btrfs_get_tree_mod_seq(root->fs_info, &tree_mod_seq_elem);
- ret = btrfs_find_all_roots(trans, root->fs_info, disko,
- tree_mod_seq_elem.seq, &roots);
- if (ret < 0)
- goto out;
- ret = 0;
- ULIST_ITER_INIT(&uiter);
- while ((root_node = ulist_next(roots, &uiter))) {
- if (root_node->val == root->objectid) {
- ret = 1;
- break;
- }
- }
- ulist_free(roots);
-out:
- btrfs_put_tree_mod_seq(root->fs_info, &tree_mod_seq_elem);
- return ret;
-}
-
static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
struct inode *inode,
u64 endoff,
@@ -3328,6 +3293,150 @@ static void clone_update_extent_map(struct inode *inode,
&BTRFS_I(inode)->runtime_flags);
}
+/*
+ * Make sure we do not end up inserting an inline extent into a file that has
+ * already other (non-inline) extents. If a file has an inline extent it can
+ * not have any other extents and the (single) inline extent must start at the
+ * file offset 0. Failing to respect these rules will lead to file corruption,
+ * resulting in EIO errors on read/write operations, hitting BUG_ON's in mm, etc
+ *
+ * We can have extents that have been already written to disk or we can have
+ * dirty ranges still in delalloc, in which case the extent maps and items are
+ * created only when we run delalloc, and the delalloc ranges might fall outside
+ * the range we are currently locking in the inode's io tree. So we check the
+ * inode's i_size because of that (i_size updates are done while holding the
+ * i_mutex, which we are holding here).
+ * We also check to see if the inode has a size not greater than "datal" but has
+ * extents beyond it, due to an fallocate with FALLOC_FL_KEEP_SIZE (and we are
+ * protected against such concurrent fallocate calls by the i_mutex).
+ *
+ * If the file has no extents but a size greater than datal, do not allow the
+ * copy because we would need turn the inline extent into a non-inline one (even
+ * with NO_HOLES enabled). If we find our destination inode only has one inline
+ * extent, just overwrite it with the source inline extent if its size is less
+ * than the source extent's size, or we could copy the source inline extent's
+ * data into the destination inode's inline extent if the later is greater then
+ * the former.
+ */
+static int clone_copy_inline_extent(struct inode *src,
+ struct inode *dst,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
+ struct btrfs_key *new_key,
+ const u64 drop_start,
+ const u64 datal,
+ const u64 skip,
+ const u64 size,
+ char *inline_data)
+{
+ struct btrfs_root *root = BTRFS_I(dst)->root;
+ const u64 aligned_end = ALIGN(new_key->offset + datal,
+ root->sectorsize);
+ int ret;
+ struct btrfs_key key;
+
+ if (new_key->offset > 0)
+ return -EOPNOTSUPP;
+
+ key.objectid = btrfs_ino(dst);
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = 0;
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ return ret;
+ } else if (ret > 0) {
+ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ return ret;
+ else if (ret > 0)
+ goto copy_inline_extent;
+ }
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (key.objectid == btrfs_ino(dst) &&
+ key.type == BTRFS_EXTENT_DATA_KEY) {
+ ASSERT(key.offset > 0);
+ return -EOPNOTSUPP;
+ }
+ } else if (i_size_read(dst) <= datal) {
+ struct btrfs_file_extent_item *ei;
+ u64 ext_len;
+
+ /*
+ * If the file size is <= datal, make sure there are no other
+ * extents following (can happen do to an fallocate call with
+ * the flag FALLOC_FL_KEEP_SIZE).
+ */
+ ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_file_extent_item);
+ /*
+ * If it's an inline extent, it can not have other extents
+ * following it.
+ */
+ if (btrfs_file_extent_type(path->nodes[0], ei) ==
+ BTRFS_FILE_EXTENT_INLINE)
+ goto copy_inline_extent;
+
+ ext_len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
+ if (ext_len > aligned_end)
+ return -EOPNOTSUPP;
+
+ ret = btrfs_next_item(root, path);
+ if (ret < 0) {
+ return ret;
+ } else if (ret == 0) {
+ btrfs_item_key_to_cpu(path->nodes[0], &key,
+ path->slots[0]);
+ if (key.objectid == btrfs_ino(dst) &&
+ key.type == BTRFS_EXTENT_DATA_KEY)
+ return -EOPNOTSUPP;
+ }
+ }
+
+copy_inline_extent:
+ /*
+ * We have no extent items, or we have an extent at offset 0 which may
+ * or may not be inlined. All these cases are dealt the same way.
+ */
+ if (i_size_read(dst) > datal) {
+ /*
+ * If the destination inode has an inline extent...
+ * This would require copying the data from the source inline
+ * extent into the beginning of the destination's inline extent.
+ * But this is really complex, both extents can be compressed
+ * or just one of them, which would require decompressing and
+ * re-compressing data (which could increase the new compressed
+ * size, not allowing the compressed data to fit anymore in an
+ * inline extent).
+ * So just don't support this case for now (it should be rare,
+ * we are not really saving space when cloning inline extents).
+ */
+ return -EOPNOTSUPP;
+ }
+
+ btrfs_release_path(path);
+ ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1);
+ if (ret)
+ return ret;
+ ret = btrfs_insert_empty_item(trans, root, path, new_key, size);
+ if (ret)
+ return ret;
+
+ if (skip) {
+ const u32 start = btrfs_file_extent_calc_inline_size(0);
+
+ memmove(inline_data + start, inline_data + start + skip, datal);
+ }
+
+ write_extent_buffer(path->nodes[0], inline_data,
+ btrfs_item_ptr_offset(path->nodes[0],
+ path->slots[0]),
+ size);
+ inode_add_bytes(dst, datal);
+
+ return 0;
+}
+
/**
* btrfs_clone() - clone a range from inode file to another
*
@@ -3352,9 +3461,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
u32 nritems;
int slot;
int ret;
- int no_quota;
const u64 len = olen_aligned;
- u64 last_disko = 0;
u64 last_dest_end = destoff;
ret = -ENOMEM;
@@ -3400,7 +3507,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
nritems = btrfs_header_nritems(path->nodes[0]);
process_slot:
- no_quota = 1;
if (path->slots[0] >= nritems) {
ret = btrfs_next_leaf(BTRFS_I(src)->root, path);
if (ret < 0)
@@ -3552,35 +3658,13 @@ process_slot:
btrfs_set_file_extent_num_bytes(leaf, extent,
datal);
- /*
- * We need to look up the roots that point at
- * this bytenr and see if the new root does. If
- * it does not we need to make sure we update
- * quotas appropriately.
- */
- if (disko && root != BTRFS_I(src)->root &&
- disko != last_disko) {
- no_quota = check_ref(trans, root,
- disko);
- if (no_quota < 0) {
- btrfs_abort_transaction(trans,
- root,
- ret);
- btrfs_end_transaction(trans,
- root);
- ret = no_quota;
- goto out;
- }
- }
-
if (disko) {
inode_add_bytes(inode, datal);
ret = btrfs_inc_extent_ref(trans, root,
disko, diskl, 0,
root->root_key.objectid,
btrfs_ino(inode),
- new_key.offset - datao,
- no_quota);
+ new_key.offset - datao);
if (ret) {
btrfs_abort_transaction(trans,
root,
@@ -3594,21 +3678,6 @@ process_slot:
} else if (type == BTRFS_FILE_EXTENT_INLINE) {
u64 skip = 0;
u64 trim = 0;
- u64 aligned_end = 0;
-
- /*
- * Don't copy an inline extent into an offset
- * greater than zero. Having an inline extent
- * at such an offset results in chaos as btrfs
- * isn't prepared for such cases. Just skip
- * this case for the same reasons as commented
- * at btrfs_ioctl_clone().
- */
- if (last_dest_end > 0) {
- ret = -EOPNOTSUPP;
- btrfs_end_transaction(trans, root);
- goto out;
- }
if (off > key.offset) {
skip = off - key.offset;
@@ -3626,42 +3695,22 @@ process_slot:
size -= skip + trim;
datal -= skip + trim;
- aligned_end = ALIGN(new_key.offset + datal,
- root->sectorsize);
- ret = btrfs_drop_extents(trans, root, inode,
- drop_start,
- aligned_end,
- 1);
+ ret = clone_copy_inline_extent(src, inode,
+ trans, path,
+ &new_key,
+ drop_start,
+ datal,
+ skip, size, buf);
if (ret) {
if (ret != -EOPNOTSUPP)
btrfs_abort_transaction(trans,
- root, ret);
- btrfs_end_transaction(trans, root);
- goto out;
- }
-
- ret = btrfs_insert_empty_item(trans, root, path,
- &new_key, size);
- if (ret) {
- btrfs_abort_transaction(trans, root,
- ret);
+ root,
+ ret);
btrfs_end_transaction(trans, root);
goto out;
}
-
- if (skip) {
- u32 start =
- btrfs_file_extent_calc_inline_size(0);
- memmove(buf+start, buf+start+skip,
- datal);
- }
-
leaf = path->nodes[0];
slot = path->slots[0];
- write_extent_buffer(leaf, buf,
- btrfs_item_ptr_offset(leaf, slot),
- size);
- inode_add_bytes(inode, datal);
}
/* If we have an implicit hole (NO_HOLES feature). */
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 303babeef..ab507e3d5 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1716,7 +1716,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
num_bytes, parent,
btrfs_header_owner(leaf),
- key.objectid, key.offset, 1);
+ key.objectid, key.offset);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
break;
@@ -1724,7 +1724,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
parent, btrfs_header_owner(leaf),
- key.objectid, key.offset, 1);
+ key.objectid, key.offset);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
break;
@@ -1900,23 +1900,21 @@ again:
ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize,
path->nodes[level]->start,
- src->root_key.objectid, level - 1, 0,
- 1);
+ src->root_key.objectid, level - 1, 0);
BUG_ON(ret);
ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize,
0, dest->root_key.objectid, level - 1,
- 0, 1);
+ 0);
BUG_ON(ret);
ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
path->nodes[level]->start,
- src->root_key.objectid, level - 1, 0,
- 1);
+ src->root_key.objectid, level - 1, 0);
BUG_ON(ret);
ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
0, dest->root_key.objectid, level - 1,
- 0, 1);
+ 0);
BUG_ON(ret);
btrfs_unlock_up_safe(path, 0);
@@ -2745,7 +2743,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
node->eb->start, blocksize,
upper->eb->start,
btrfs_header_owner(upper->eb),
- node->level, 0, 1);
+ node->level, 0);
BUG_ON(ret);
ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index a739b825b..23bb2e4b9 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -2353,8 +2353,14 @@ static int send_subvol_begin(struct send_ctx *sctx)
}
TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
- TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
- sctx->send_root->root_item.uuid);
+
+ if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid))
+ TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
+ sctx->send_root->root_item.received_uuid);
+ else
+ TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
+ sctx->send_root->root_item.uuid);
+
TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
le64_to_cpu(sctx->send_root->root_item.ctransid));
if (parent_root) {
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 1bbaace73..6f8af2de5 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -691,7 +691,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
ret = btrfs_inc_extent_ref(trans, root,
ins.objectid, ins.offset,
0, root->root_key.objectid,
- key->objectid, offset, 0);
+ key->objectid, offset);
if (ret)
goto out;
} else {
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 6f518c90e..1fcd7b6e7 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -313,8 +313,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
/* check to make sure this item is what we want */
if (found_key.objectid != key.objectid)
break;
- if (found_key.type != BTRFS_XATTR_ITEM_KEY)
+ if (found_key.type > BTRFS_XATTR_ITEM_KEY)
break;
+ if (found_key.type < BTRFS_XATTR_ITEM_KEY)
+ goto next;
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
if (verify_dir_item(root, leaf, di))
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 51cb02da7..fe2c98276 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1935,7 +1935,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
len = sizeof(*head) +
pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
- sizeof(struct timespec);
+ sizeof(struct ceph_timespec);
/* calculate (max) length for cap releases */
len += sizeof(struct ceph_mds_request_release) *
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index c711be8d6..9c8d23316 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -271,8 +271,12 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
dput(dentry);
dentry = ERR_PTR(-EEXIST);
}
- if (IS_ERR(dentry))
+
+ if (IS_ERR(dentry)) {
mutex_unlock(&d_inode(parent)->i_mutex);
+ simple_release_fs(&debugfs_mount, &debugfs_mount_count);
+ }
+
return dentry;
}
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index 457315581..2fab243a4 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -411,7 +411,13 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
ext4_lblk_t lblk = ex->ee_block;
ext4_fsblk_t pblk = ext4_ext_pblock(ex);
unsigned int len = ext4_ext_get_actual_len(ex);
- int err = 0;
+ int ret, err = 0;
+
+#if 0
+ ext4_msg(inode->i_sb, KERN_CRIT,
+ "ext4_encrypted_zeroout ino %lu lblk %u len %u",
+ (unsigned long) inode->i_ino, lblk, len);
+#endif
BUG_ON(inode->i_sb->s_blocksize != PAGE_CACHE_SIZE);
@@ -437,17 +443,26 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
goto errout;
}
bio->bi_bdev = inode->i_sb->s_bdev;
- bio->bi_iter.bi_sector = pblk;
- err = bio_add_page(bio, ciphertext_page,
+ bio->bi_iter.bi_sector =
+ pblk << (inode->i_sb->s_blocksize_bits - 9);
+ ret = bio_add_page(bio, ciphertext_page,
inode->i_sb->s_blocksize, 0);
- if (err) {
+ if (ret != inode->i_sb->s_blocksize) {
+ /* should never happen! */
+ ext4_msg(inode->i_sb, KERN_ERR,
+ "bio_add_page failed: %d", ret);
+ WARN_ON(1);
bio_put(bio);
+ err = -EIO;
goto errout;
}
err = submit_bio_wait(WRITE, bio);
+ if ((err == 0) && bio->bi_error)
+ err = -EIO;
bio_put(bio);
if (err)
goto errout;
+ lblk++; pblk++;
}
err = 0;
errout:
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index d41843181..e770c1ee4 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -88,13 +88,13 @@ int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
return 0;
}
+ err = handle->h_err;
if (!handle->h_transaction) {
- err = jbd2_journal_stop(handle);
- return handle->h_err ? handle->h_err : err;
+ rc = jbd2_journal_stop(handle);
+ return err ? err : rc;
}
sb = handle->h_transaction->t_journal->j_private;
- err = handle->h_err;
rc = jbd2_journal_stop(handle);
if (!err)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2553aa8b6..7f486e350 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3558,6 +3558,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
max_zeroout = sbi->s_extent_max_zeroout_kb >>
(inode->i_sb->s_blocksize_bits - 10);
+ if (ext4_encrypted_inode(inode))
+ max_zeroout = 0;
+
/* If extent is less than s_max_zeroout_kb, zeroout directly */
if (max_zeroout && (ee_len <= max_zeroout)) {
err = ext4_ext_zeroout(inode, ex);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 84ba4d2b3..17fbe3882 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -425,6 +425,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
struct buffer_head *bh, *head;
int ret = 0;
int nr_submitted = 0;
+ int nr_to_submit = 0;
blocksize = 1 << inode->i_blkbits;
@@ -477,11 +478,13 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
}
set_buffer_async_write(bh);
+ nr_to_submit++;
} while ((bh = bh->b_this_page) != head);
bh = head = page_buffers(page);
- if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
+ if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode) &&
+ nr_to_submit) {
data_page = ext4_encrypt(inode, page);
if (IS_ERR(data_page)) {
ret = PTR_ERR(data_page);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a63c7b0a1..df84bd256 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -394,9 +394,13 @@ static void ext4_handle_error(struct super_block *sb)
smp_wmb();
sb->s_flags |= MS_RDONLY;
}
- if (test_opt(sb, ERRORS_PANIC))
+ if (test_opt(sb, ERRORS_PANIC)) {
+ if (EXT4_SB(sb)->s_journal &&
+ !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
+ return;
panic("EXT4-fs (device %s): panic forced after error\n",
sb->s_id);
+ }
}
#define ext4_error_ratelimit(sb) \
@@ -585,8 +589,12 @@ void __ext4_abort(struct super_block *sb, const char *function,
jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
save_error_info(sb, function, line);
}
- if (test_opt(sb, ERRORS_PANIC))
+ if (test_opt(sb, ERRORS_PANIC)) {
+ if (EXT4_SB(sb)->s_journal &&
+ !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
+ return;
panic("EXT4-fs panic from previous error\n");
+ }
}
void __ext4_msg(struct super_block *sb,
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 8270fe9e3..37023d0bd 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2071,8 +2071,12 @@ static void __journal_abort_soft (journal_t *journal, int errno)
__jbd2_journal_abort_hard(journal);
- if (errno)
+ if (errno) {
jbd2_journal_update_sb_errno(journal);
+ write_lock(&journal->j_state_lock);
+ journal->j_flags |= JBD2_REC_ERR;
+ write_unlock(&journal->j_state_lock);
+ }
}
/**
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 326d9e10d..ffdf9b9e8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1824,7 +1824,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0)
nfsi->attr_gencount = fattr->gencount;
}
- invalid &= ~NFS_INO_INVALID_ATTR;
+
+ /* Don't declare attrcache up to date if there were no attrs! */
+ if (fattr->valid != 0)
+ invalid &= ~NFS_INO_INVALID_ATTR;
+
/* Don't invalidate the data if we were to blame */
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
|| S_ISLNK(inode->i_mode)))
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 223bedda6..10410e8b5 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -33,7 +33,7 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
return ret;
idr_preload(GFP_KERNEL);
spin_lock(&nn->nfs_client_lock);
- ret = idr_alloc(&nn->cb_ident_idr, clp, 0, 0, GFP_NOWAIT);
+ ret = idr_alloc(&nn->cb_ident_idr, clp, 1, 0, GFP_NOWAIT);
if (ret >= 0)
clp->cl_cb_ident = ret;
spin_unlock(&nn->nfs_client_lock);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 8abe27165..abf5caea2 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -872,33 +872,38 @@ send_layoutget(struct pnfs_layout_hdr *lo,
dprintk("--> %s\n", __func__);
- lgp = kzalloc(sizeof(*lgp), gfp_flags);
- if (lgp == NULL)
- return NULL;
+ /*
+ * Synchronously retrieve layout information from server and
+ * store in lseg. If we race with a concurrent seqid morphing
+ * op, then re-send the LAYOUTGET.
+ */
+ do {
+ lgp = kzalloc(sizeof(*lgp), gfp_flags);
+ if (lgp == NULL)
+ return NULL;
+
+ i_size = i_size_read(ino);
+
+ lgp->args.minlength = PAGE_CACHE_SIZE;
+ if (lgp->args.minlength > range->length)
+ lgp->args.minlength = range->length;
+ if (range->iomode == IOMODE_READ) {
+ if (range->offset >= i_size)
+ lgp->args.minlength = 0;
+ else if (i_size - range->offset < lgp->args.minlength)
+ lgp->args.minlength = i_size - range->offset;
+ }
+ lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
+ lgp->args.range = *range;
+ lgp->args.type = server->pnfs_curr_ld->id;
+ lgp->args.inode = ino;
+ lgp->args.ctx = get_nfs_open_context(ctx);
+ lgp->gfp_flags = gfp_flags;
+ lgp->cred = lo->plh_lc_cred;
- i_size = i_size_read(ino);
+ lseg = nfs4_proc_layoutget(lgp, gfp_flags);
+ } while (lseg == ERR_PTR(-EAGAIN));
- lgp->args.minlength = PAGE_CACHE_SIZE;
- if (lgp->args.minlength > range->length)
- lgp->args.minlength = range->length;
- if (range->iomode == IOMODE_READ) {
- if (range->offset >= i_size)
- lgp->args.minlength = 0;
- else if (i_size - range->offset < lgp->args.minlength)
- lgp->args.minlength = i_size - range->offset;
- }
- lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
- lgp->args.range = *range;
- lgp->args.type = server->pnfs_curr_ld->id;
- lgp->args.inode = ino;
- lgp->args.ctx = get_nfs_open_context(ctx);
- lgp->gfp_flags = gfp_flags;
- lgp->cred = lo->plh_lc_cred;
-
- /* Synchronously retrieve layout information from server and
- * store in lseg.
- */
- lseg = nfs4_proc_layoutget(lgp, gfp_flags);
if (IS_ERR(lseg)) {
switch (PTR_ERR(lseg)) {
case -ENOMEM:
@@ -1687,6 +1692,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
/* existing state ID, make sure the sequence number matches. */
if (pnfs_layout_stateid_blocked(lo, &res->stateid)) {
dprintk("%s forget reply due to sequence\n", __func__);
+ status = -EAGAIN;
goto out_forget_reply;
}
pnfs_set_layout_stateid(lo, &res->stateid, false);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0f1d5691b..0dea0c254 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -765,16 +765,68 @@ void nfs4_unhash_stid(struct nfs4_stid *s)
s->sc_type = 0;
}
-static void
+/**
+ * nfs4_get_existing_delegation - Discover if this delegation already exists
+ * @clp: a pointer to the nfs4_client we're granting a delegation to
+ * @fp: a pointer to the nfs4_file we're granting a delegation on
+ *
+ * Return:
+ * On success: NULL if an existing delegation was not found.
+ *
+ * On error: -EAGAIN if one was previously granted to this nfs4_client
+ * for this nfs4_file.
+ *
+ */
+
+static int
+nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp)
+{
+ struct nfs4_delegation *searchdp = NULL;
+ struct nfs4_client *searchclp = NULL;
+
+ lockdep_assert_held(&state_lock);
+ lockdep_assert_held(&fp->fi_lock);
+
+ list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) {
+ searchclp = searchdp->dl_stid.sc_client;
+ if (clp == searchclp) {
+ return -EAGAIN;
+ }
+ }
+ return 0;
+}
+
+/**
+ * hash_delegation_locked - Add a delegation to the appropriate lists
+ * @dp: a pointer to the nfs4_delegation we are adding.
+ * @fp: a pointer to the nfs4_file we're granting a delegation on
+ *
+ * Return:
+ * On success: NULL if the delegation was successfully hashed.
+ *
+ * On error: -EAGAIN if one was previously granted to this
+ * nfs4_client for this nfs4_file. Delegation is not hashed.
+ *
+ */
+
+static int
hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
{
+ int status;
+ struct nfs4_client *clp = dp->dl_stid.sc_client;
+
lockdep_assert_held(&state_lock);
lockdep_assert_held(&fp->fi_lock);
+ status = nfs4_get_existing_delegation(clp, fp);
+ if (status)
+ return status;
+ ++fp->fi_delegees;
atomic_inc(&dp->dl_stid.sc_count);
dp->dl_stid.sc_type = NFS4_DELEG_STID;
list_add(&dp->dl_perfile, &fp->fi_delegations);
- list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
+ list_add(&dp->dl_perclnt, &clp->cl_delegations);
+ return 0;
}
static bool
@@ -3360,6 +3412,7 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
stp->st_access_bmap = 0;
stp->st_deny_bmap = 0;
stp->st_openstp = NULL;
+ init_rwsem(&stp->st_rwsem);
spin_lock(&oo->oo_owner.so_client->cl_lock);
list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
spin_lock(&fp->fi_lock);
@@ -3945,6 +3998,18 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag)
return fl;
}
+/**
+ * nfs4_setlease - Obtain a delegation by requesting lease from vfs layer
+ * @dp: a pointer to the nfs4_delegation we're adding.
+ *
+ * Return:
+ * On success: Return code will be 0 on success.
+ *
+ * On error: -EAGAIN if there was an existing delegation.
+ * nonzero if there is an error in other cases.
+ *
+ */
+
static int nfs4_setlease(struct nfs4_delegation *dp)
{
struct nfs4_file *fp = dp->dl_stid.sc_file;
@@ -3976,16 +4041,19 @@ static int nfs4_setlease(struct nfs4_delegation *dp)
goto out_unlock;
/* Race breaker */
if (fp->fi_deleg_file) {
- status = 0;
- ++fp->fi_delegees;
- hash_delegation_locked(dp, fp);
+ status = hash_delegation_locked(dp, fp);
goto out_unlock;
}
fp->fi_deleg_file = filp;
- fp->fi_delegees = 1;
- hash_delegation_locked(dp, fp);
+ fp->fi_delegees = 0;
+ status = hash_delegation_locked(dp, fp);
spin_unlock(&fp->fi_lock);
spin_unlock(&state_lock);
+ if (status) {
+ /* Should never happen, this is a new fi_deleg_file */
+ WARN_ON_ONCE(1);
+ goto out_fput;
+ }
return 0;
out_unlock:
spin_unlock(&fp->fi_lock);
@@ -4005,6 +4073,15 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
if (fp->fi_had_conflict)
return ERR_PTR(-EAGAIN);
+ spin_lock(&state_lock);
+ spin_lock(&fp->fi_lock);
+ status = nfs4_get_existing_delegation(clp, fp);
+ spin_unlock(&fp->fi_lock);
+ spin_unlock(&state_lock);
+
+ if (status)
+ return ERR_PTR(status);
+
dp = alloc_init_deleg(clp, fh, odstate);
if (!dp)
return ERR_PTR(-ENOMEM);
@@ -4023,9 +4100,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
status = -EAGAIN;
goto out_unlock;
}
- ++fp->fi_delegees;
- hash_delegation_locked(dp, fp);
- status = 0;
+ status = hash_delegation_locked(dp, fp);
out_unlock:
spin_unlock(&fp->fi_lock);
spin_unlock(&state_lock);
@@ -4187,15 +4262,20 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
*/
if (stp) {
/* Stateid was found, this is an OPEN upgrade */
+ down_read(&stp->st_rwsem);
status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
- if (status)
+ if (status) {
+ up_read(&stp->st_rwsem);
goto out;
+ }
} else {
stp = open->op_stp;
open->op_stp = NULL;
init_open_stateid(stp, fp, open);
+ down_read(&stp->st_rwsem);
status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
if (status) {
+ up_read(&stp->st_rwsem);
release_open_stateid(stp);
goto out;
}
@@ -4207,6 +4287,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
}
update_stateid(&stp->st_stid.sc_stateid);
memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+ up_read(&stp->st_rwsem);
if (nfsd4_has_session(&resp->cstate)) {
if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
@@ -4819,10 +4900,13 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
* revoked delegations are kept only for free_stateid.
*/
return nfserr_bad_stateid;
+ down_write(&stp->st_rwsem);
status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate));
- if (status)
- return status;
- return nfs4_check_fh(current_fh, &stp->st_stid);
+ if (status == nfs_ok)
+ status = nfs4_check_fh(current_fh, &stp->st_stid);
+ if (status != nfs_ok)
+ up_write(&stp->st_rwsem);
+ return status;
}
/*
@@ -4869,6 +4953,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
return status;
oo = openowner(stp->st_stateowner);
if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
+ up_write(&stp->st_rwsem);
nfs4_put_stid(&stp->st_stid);
return nfserr_bad_stateid;
}
@@ -4899,11 +4984,14 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
oo = openowner(stp->st_stateowner);
status = nfserr_bad_stateid;
- if (oo->oo_flags & NFS4_OO_CONFIRMED)
+ if (oo->oo_flags & NFS4_OO_CONFIRMED) {
+ up_write(&stp->st_rwsem);
goto put_stateid;
+ }
oo->oo_flags |= NFS4_OO_CONFIRMED;
update_stateid(&stp->st_stid.sc_stateid);
memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+ up_write(&stp->st_rwsem);
dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
__func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid));
@@ -4982,6 +5070,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
status = nfs_ok;
put_stateid:
+ up_write(&stp->st_rwsem);
nfs4_put_stid(&stp->st_stid);
out:
nfsd4_bump_seqid(cstate, status);
@@ -5035,6 +5124,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
update_stateid(&stp->st_stid.sc_stateid);
memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+ up_write(&stp->st_rwsem);
nfsd4_close_open_stateid(stp);
@@ -5260,6 +5350,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
stp->st_access_bmap = 0;
stp->st_deny_bmap = open_stp->st_deny_bmap;
stp->st_openstp = open_stp;
+ init_rwsem(&stp->st_rwsem);
list_add(&stp->st_locks, &open_stp->st_locks);
list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
spin_lock(&fp->fi_lock);
@@ -5428,6 +5519,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&open_stp, nn);
if (status)
goto out;
+ up_write(&open_stp->st_rwsem);
open_sop = openowner(open_stp->st_stateowner);
status = nfserr_bad_stateid;
if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
@@ -5435,6 +5527,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
status = lookup_or_create_lock_state(cstate, open_stp, lock,
&lock_stp, &new);
+ if (status == nfs_ok)
+ down_write(&lock_stp->st_rwsem);
} else {
status = nfs4_preprocess_seqid_op(cstate,
lock->lk_old_lock_seqid,
@@ -5540,6 +5634,8 @@ out:
seqid_mutating_err(ntohl(status)))
lock_sop->lo_owner.so_seqid++;
+ up_write(&lock_stp->st_rwsem);
+
/*
* If this is a new, never-before-used stateid, and we are
* returning an error, then just go ahead and release it.
@@ -5709,6 +5805,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fput:
fput(filp);
put_stateid:
+ up_write(&stp->st_rwsem);
nfs4_put_stid(&stp->st_stid);
out:
nfsd4_bump_seqid(cstate, status);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 583ffc13c..31bde12fe 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -534,15 +534,16 @@ struct nfs4_file {
* Better suggestions welcome.
*/
struct nfs4_ol_stateid {
- struct nfs4_stid st_stid; /* must be first field */
- struct list_head st_perfile;
- struct list_head st_perstateowner;
- struct list_head st_locks;
- struct nfs4_stateowner * st_stateowner;
- struct nfs4_clnt_odstate * st_clnt_odstate;
- unsigned char st_access_bmap;
- unsigned char st_deny_bmap;
- struct nfs4_ol_stateid * st_openstp;
+ struct nfs4_stid st_stid;
+ struct list_head st_perfile;
+ struct list_head st_perstateowner;
+ struct list_head st_locks;
+ struct nfs4_stateowner *st_stateowner;
+ struct nfs4_clnt_odstate *st_clnt_odstate;
+ unsigned char st_access_bmap;
+ unsigned char st_deny_bmap;
+ struct nfs4_ol_stateid *st_openstp;
+ struct rw_semaphore st_rwsem;
};
static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index b7dfac226..12bfa9ca5 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -374,6 +374,8 @@ static int ocfs2_mknod(struct inode *dir,
mlog_errno(status);
goto leave;
}
+ /* update inode->i_mode after mask with "umask". */
+ inode->i_mode = mode;
handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb,
S_ISDIR(mode),