From 8d91c1e411f55d7ea91b1183a2e9f8088fb4d5be Mon Sep 17 00:00:00 2001 From: André Fabian Silva Delgado Date: Tue, 15 Dec 2015 14:52:16 -0300 Subject: Linux-libre 4.3.2-gnu --- fs/xfs/Makefile | 2 +- fs/xfs/libxfs/xfs_alloc.c | 6 +- fs/xfs/libxfs/xfs_alloc_btree.c | 4 +- fs/xfs/libxfs/xfs_attr.c | 2 + fs/xfs/libxfs/xfs_attr_leaf.c | 4 +- fs/xfs/libxfs/xfs_attr_remote.c | 9 +- fs/xfs/libxfs/xfs_bit.c | 118 ++++++++++++++++++++ fs/xfs/libxfs/xfs_bmap.c | 1 + fs/xfs/libxfs/xfs_bmap_btree.c | 5 +- fs/xfs/libxfs/xfs_btree.c | 10 +- fs/xfs/libxfs/xfs_da_btree.c | 32 +++--- fs/xfs/libxfs/xfs_dir2.c | 36 +++---- fs/xfs/libxfs/xfs_dir2_block.c | 4 +- fs/xfs/libxfs/xfs_dir2_data.c | 4 +- fs/xfs/libxfs/xfs_dir2_leaf.c | 4 +- fs/xfs/libxfs/xfs_dir2_node.c | 7 +- fs/xfs/libxfs/xfs_dquot_buf.c | 4 +- fs/xfs/libxfs/xfs_format.h | 22 +++- fs/xfs/libxfs/xfs_ialloc.c | 7 +- fs/xfs/libxfs/xfs_ialloc_btree.c | 2 +- fs/xfs/libxfs/xfs_inode_buf.c | 4 +- fs/xfs/libxfs/xfs_sb.c | 27 +++-- fs/xfs/libxfs/xfs_symlink_remote.c | 4 +- fs/xfs/xfs_aops.c | 16 ++- fs/xfs/xfs_bit.c | 118 -------------------- fs/xfs/xfs_bmap_util.c | 87 ++++++++------- fs/xfs/xfs_buf.c | 16 ++- fs/xfs/xfs_buf.h | 1 + fs/xfs/xfs_buf_item.c | 26 ++--- fs/xfs/xfs_buf_item.h | 2 +- fs/xfs/xfs_dir2_readdir.c | 11 +- fs/xfs/xfs_dquot.c | 10 +- fs/xfs/xfs_extfree_item.c | 105 ++++++++---------- fs/xfs/xfs_extfree_item.h | 26 ++++- fs/xfs/xfs_file.c | 81 ++++++++++---- fs/xfs/xfs_fsops.c | 6 +- fs/xfs/xfs_icache.c | 2 + fs/xfs/xfs_inode.c | 141 ++++++++++++++++-------- fs/xfs/xfs_inode.h | 85 ++++++++++----- fs/xfs/xfs_inode_item.c | 11 +- fs/xfs/xfs_iops.c | 8 +- fs/xfs/xfs_itable.c | 3 +- fs/xfs/xfs_log.c | 87 +++++++++------ fs/xfs/xfs_log.h | 1 + fs/xfs/xfs_log_cil.c | 8 +- fs/xfs/xfs_log_priv.h | 2 + fs/xfs/xfs_log_recover.c | 216 ++++++++++++++++++++++++++++--------- fs/xfs/xfs_mount.c | 28 ++--- fs/xfs/xfs_rtalloc.c | 57 +++++----- fs/xfs/xfs_super.c | 16 ++- fs/xfs/xfs_symlink.c | 9 +- fs/xfs/xfs_trace.h | 35 ++++++ fs/xfs/xfs_trans.c | 15 ++- fs/xfs/xfs_trans.h | 9 +- fs/xfs/xfs_trans_extfree.c | 32 ++++-- fs/xfs/xfs_trans_priv.h | 15 +++ 56 files changed, 997 insertions(+), 606 deletions(-) create mode 100644 fs/xfs/libxfs/xfs_bit.c delete mode 100644 fs/xfs/xfs_bit.c (limited to 'fs/xfs') diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index df6828570..a096841bd 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -33,6 +33,7 @@ xfs-y += $(addprefix libxfs/, \ xfs_attr.o \ xfs_attr_leaf.o \ xfs_attr_remote.o \ + xfs_bit.o \ xfs_bmap.o \ xfs_bmap_btree.o \ xfs_btree.o \ @@ -63,7 +64,6 @@ xfs-$(CONFIG_XFS_RT) += $(addprefix libxfs/, \ xfs-y += xfs_aops.o \ xfs_attr_inactive.o \ xfs_attr_list.o \ - xfs_bit.o \ xfs_bmap_util.o \ xfs_buf.o \ xfs_dir2_readdir.o \ diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index f9e9ffe6f..ffad7f203 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -464,7 +464,7 @@ xfs_agfl_verify( struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); int i; - if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_uuid)) + if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid)) return false; if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC) return false; @@ -1937,7 +1937,7 @@ xfs_alloc_fix_freelist( struct xfs_alloc_arg targs; /* local allocation arguments */ xfs_agblock_t bno; /* freelist block */ xfs_extlen_t need; /* total blocks needed in freelist */ - int error; + int error = 0; if (!pag->pagf_init) { error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp); @@ -2260,7 +2260,7 @@ xfs_agf_verify( struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); if (xfs_sb_version_hascrc(&mp->m_sb) && - !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_uuid)) + !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) return false; if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 59d521c09..90de071dd 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -295,7 +295,7 @@ xfs_allocbt_verify( case cpu_to_be32(XFS_ABTB_CRC_MAGIC): if (!xfs_sb_version_hascrc(&mp->m_sb)) return false; - if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid)) + if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) return false; if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) return false; @@ -313,7 +313,7 @@ xfs_allocbt_verify( case cpu_to_be32(XFS_ABTC_CRC_MAGIC): if (!xfs_sb_version_hascrc(&mp->m_sb)) return false; - if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid)) + if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) return false; if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) return false; diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 3349c9a1e..ff0655789 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -139,6 +139,8 @@ xfs_attr_get( args.value = value; args.valuelen = *valuelenp; + /* Entirely possible to look up a name which doesn't exist */ + args.op_flags = XFS_DA_OP_OKNOENT; lock_mode = xfs_ilock_attr_map_shared(ip); if (!xfs_inode_hasattr(ip)) diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index e9d401ce9..33df52d97 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -262,7 +262,7 @@ xfs_attr3_leaf_verify( if (ichdr.magic != XFS_ATTR3_LEAF_MAGIC) return false; - if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_uuid)) + if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid)) return false; if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) return false; @@ -1056,7 +1056,7 @@ xfs_attr3_leaf_create( hdr3->blkno = cpu_to_be64(bp->b_bn); hdr3->owner = cpu_to_be64(dp->i_ino); - uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid); + uuid_copy(&hdr3->uuid, &mp->m_sb.sb_meta_uuid); ichdr.freemap[0].base = sizeof(struct xfs_attr3_leaf_hdr); } else { diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index dd714037c..f38f9bd81 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -100,7 +100,7 @@ xfs_attr3_rmt_verify( return false; if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC)) return false; - if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_uuid)) + if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid)) return false; if (be64_to_cpu(rmt->rm_blkno) != bno) return false; @@ -222,7 +222,7 @@ xfs_attr3_rmt_hdr_set( rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC); rmt->rm_offset = cpu_to_be32(offset); rmt->rm_bytes = cpu_to_be32(size); - uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_uuid); + uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid); rmt->rm_owner = cpu_to_be64(ino); rmt->rm_blkno = cpu_to_be64(bno); @@ -618,9 +618,8 @@ xfs_attr_rmtval_remove( xfs_bmap_init(args->flist, args->firstblock); error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, - XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, - 1, args->firstblock, args->flist, - &done); + XFS_BMAPI_ATTRFORK, 1, args->firstblock, + args->flist, &done); if (!error) { error = xfs_bmap_finish(&args->trans, args->flist, &committed); diff --git a/fs/xfs/libxfs/xfs_bit.c b/fs/xfs/libxfs/xfs_bit.c new file mode 100644 index 000000000..0e8885a59 --- /dev/null +++ b/fs/xfs/libxfs/xfs_bit.c @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_log_format.h" +#include "xfs_bit.h" + +/* + * XFS bit manipulation routines, used in non-realtime code. + */ + +/* + * Return whether bitmap is empty. + * Size is number of words in the bitmap, which is padded to word boundary + * Returns 1 for empty, 0 for non-empty. + */ +int +xfs_bitmap_empty(uint *map, uint size) +{ + uint i; + uint ret = 0; + + for (i = 0; i < size; i++) { + ret |= map[i]; + } + + return (ret == 0); +} + +/* + * Count the number of contiguous bits set in the bitmap starting with bit + * start_bit. Size is the size of the bitmap in words. + */ +int +xfs_contig_bits(uint *map, uint size, uint start_bit) +{ + uint * p = ((unsigned int *) map) + (start_bit >> BIT_TO_WORD_SHIFT); + uint result = 0; + uint tmp; + + size <<= BIT_TO_WORD_SHIFT; + + ASSERT(start_bit < size); + size -= start_bit & ~(NBWORD - 1); + start_bit &= (NBWORD - 1); + if (start_bit) { + tmp = *p++; + /* set to one first offset bits prior to start */ + tmp |= (~0U >> (NBWORD-start_bit)); + if (tmp != ~0U) + goto found; + result += NBWORD; + size -= NBWORD; + } + while (size) { + if ((tmp = *p++) != ~0U) + goto found; + result += NBWORD; + size -= NBWORD; + } + return result - start_bit; +found: + return result + ffz(tmp) - start_bit; +} + +/* + * This takes the bit number to start looking from and + * returns the next set bit from there. It returns -1 + * if there are no more bits set or the start bit is + * beyond the end of the bitmap. + * + * Size is the number of words, not bytes, in the bitmap. + */ +int xfs_next_bit(uint *map, uint size, uint start_bit) +{ + uint * p = ((unsigned int *) map) + (start_bit >> BIT_TO_WORD_SHIFT); + uint result = start_bit & ~(NBWORD - 1); + uint tmp; + + size <<= BIT_TO_WORD_SHIFT; + + if (start_bit >= size) + return -1; + size -= result; + start_bit &= (NBWORD - 1); + if (start_bit) { + tmp = *p++; + /* set to zero first offset bits prior to start */ + tmp &= (~0U << start_bit); + if (tmp != 0U) + goto found; + result += NBWORD; + size -= NBWORD; + } + while (size) { + if ((tmp = *p++) != 0U) + goto found; + result += NBWORD; + size -= NBWORD; + } + return -1; +found: + return result + ffs(tmp) - 1; +} diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 63e05b663..8e2010d53 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5945,6 +5945,7 @@ xfs_bmap_split_extent( return xfs_trans_commit(tp); out: + xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp); return error; } diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 2c44c8e50..6b0cf6546 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -349,7 +349,8 @@ xfs_bmbt_to_bmdr( if (xfs_sb_version_hascrc(&mp->m_sb)) { ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_CRC_MAGIC)); - ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid)); + ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, + &mp->m_sb.sb_meta_uuid)); ASSERT(rblock->bb_u.l.bb_blkno == cpu_to_be64(XFS_BUF_DADDR_NULL)); } else @@ -647,7 +648,7 @@ xfs_bmbt_verify( case cpu_to_be32(XFS_BMAP_CRC_MAGIC): if (!xfs_sb_version_hascrc(&mp->m_sb)) return false; - if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid)) + if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) return false; if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn) return false; diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index c72283dd8..f7d7ee7a2 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -65,7 +65,8 @@ xfs_btree_check_lblock( if (xfs_sb_version_hascrc(&mp->m_sb)) { lblock_ok = lblock_ok && - uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid) && + uuid_equal(&block->bb_u.l.bb_uuid, + &mp->m_sb.sb_meta_uuid) && block->bb_u.l.bb_blkno == cpu_to_be64( bp ? bp->b_bn : XFS_BUF_DADDR_NULL); } @@ -115,7 +116,8 @@ xfs_btree_check_sblock( if (xfs_sb_version_hascrc(&mp->m_sb)) { sblock_ok = sblock_ok && - uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid) && + uuid_equal(&block->bb_u.s.bb_uuid, + &mp->m_sb.sb_meta_uuid) && block->bb_u.s.bb_blkno == cpu_to_be64( bp ? bp->b_bn : XFS_BUF_DADDR_NULL); } @@ -1000,7 +1002,7 @@ xfs_btree_init_block_int( if (flags & XFS_BTREE_CRC_BLOCKS) { buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); buf->bb_u.l.bb_owner = cpu_to_be64(owner); - uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid); + uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid); buf->bb_u.l.bb_pad = 0; buf->bb_u.l.bb_lsn = 0; } @@ -1013,7 +1015,7 @@ xfs_btree_init_block_int( if (flags & XFS_BTREE_CRC_BLOCKS) { buf->bb_u.s.bb_blkno = cpu_to_be64(blkno); buf->bb_u.s.bb_owner = cpu_to_be32(__owner); - uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid); + uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid); buf->bb_u.s.bb_lsn = 0; } } diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 2385f8cd0..be43248a5 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -146,7 +146,7 @@ xfs_da3_node_verify( if (ichdr.magic != XFS_DA3_NODE_MAGIC) return false; - if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_uuid)) + if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid)) return false; if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) return false; @@ -233,6 +233,7 @@ xfs_da3_node_read_verify( bp->b_ops->verify_read(bp); return; default: + xfs_buf_ioerror(bp, -EFSCORRUPTED); break; } @@ -324,7 +325,7 @@ xfs_da3_node_create( ichdr.magic = XFS_DA3_NODE_MAGIC; hdr3->info.blkno = cpu_to_be64(bp->b_bn); hdr3->info.owner = cpu_to_be64(args->dp->i_ino); - uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_uuid); + uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid); } else { ichdr.magic = XFS_DA_NODE_MAGIC; } @@ -1822,6 +1823,7 @@ xfs_da3_path_shift( struct xfs_da_args *args; struct xfs_da_node_entry *btree; struct xfs_da3_icnode_hdr nodehdr; + struct xfs_buf *bp; xfs_dablk_t blkno = 0; int level; int error; @@ -1866,20 +1868,24 @@ xfs_da3_path_shift( */ for (blk++, level++; level < path->active; blk++, level++) { /* - * Release the old block. - * (if it's dirty, trans won't actually let go) + * Read the next child block into a local buffer. */ - if (release) - xfs_trans_brelse(args->trans, blk->bp); + error = xfs_da3_node_read(args->trans, dp, blkno, -1, &bp, + args->whichfork); + if (error) + return error; /* - * Read the next child block. + * Release the old block (if it's dirty, the trans doesn't + * actually let go) and swap the local buffer into the path + * structure. This ensures failure of the above read doesn't set + * a NULL buffer in an active slot in the path. */ + if (release) + xfs_trans_brelse(args->trans, blk->bp); blk->blkno = blkno; - error = xfs_da3_node_read(args->trans, dp, blkno, -1, - &blk->bp, args->whichfork); - if (error) - return error; + blk->bp = bp; + info = blk->bp->b_addr; ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) || @@ -2351,8 +2357,8 @@ xfs_da_shrink_inode( * the last block to the place we want to kill. */ error = xfs_bunmapi(tp, dp, dead_blkno, count, - xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, - 0, args->firstblock, args->flist, &done); + xfs_bmapi_aflag(w), 0, args->firstblock, + args->flist, &done); if (error == -ENOSPC) { if (w != XFS_DATA_FORK) break; diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index a69fb3a1e..9de401d29 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -362,6 +362,7 @@ xfs_dir_lookup( struct xfs_da_args *args; int rval; int v; /* type-checking value */ + int lock_mode; ASSERT(S_ISDIR(dp->i_d.di_mode)); XFS_STATS_INC(xs_dir_lookup); @@ -387,6 +388,7 @@ xfs_dir_lookup( if (ci_name) args->op_flags |= XFS_DA_OP_CILOOKUP; + lock_mode = xfs_ilock_data_map_shared(dp); if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { rval = xfs_dir2_sf_lookup(args); goto out_check_rval; @@ -419,6 +421,7 @@ out_check_rval: } } out_free: + xfs_iunlock(dp, lock_mode); kmem_free(args); return rval; } @@ -674,25 +677,22 @@ xfs_dir2_shrink_inode( mp = dp->i_mount; tp = args->trans; da = xfs_dir2_db_to_da(args->geo, db); - /* - * Unmap the fsblock(s). - */ - if ((error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount, - XFS_BMAPI_METADATA, 0, args->firstblock, args->flist, - &done))) { + + /* Unmap the fsblock(s). */ + error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount, 0, 0, + args->firstblock, args->flist, &done); + if (error) { /* - * ENOSPC actually can happen if we're in a removename with - * no space reservation, and the resulting block removal - * would cause a bmap btree split or conversion from extents - * to btree. This can only happen for un-fragmented - * directory blocks, since you need to be punching out - * the middle of an extent. - * In this case we need to leave the block in the file, - * and not binval it. - * So the block has to be in a consistent empty state - * and appropriately logged. - * We don't free up the buffer, the caller can tell it - * hasn't happened since it got an error back. + * ENOSPC actually can happen if we're in a removename with no + * space reservation, and the resulting block removal would + * cause a bmap btree split or conversion from extents to btree. + * This can only happen for un-fragmented directory blocks, + * since you need to be punching out the middle of an extent. + * In this case we need to leave the block in the file, and not + * binval it. So the block has to be in a consistent empty + * state and appropriately logged. We don't free up the buffer, + * the caller can tell it hasn't happened since it got an error + * back. */ return error; } diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index 9354e190b..4778d1dd5 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -67,7 +67,7 @@ xfs_dir3_block_verify( if (xfs_sb_version_hascrc(&mp->m_sb)) { if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) return false; - if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid)) + if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) return false; if (be64_to_cpu(hdr3->blkno) != bp->b_bn) return false; @@ -157,7 +157,7 @@ xfs_dir3_block_init( hdr3->magic = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC); hdr3->blkno = cpu_to_be64(bp->b_bn); hdr3->owner = cpu_to_be64(dp->i_ino); - uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid); + uuid_copy(&hdr3->uuid, &mp->m_sb.sb_meta_uuid); return; } diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 534bbf283..824131e71 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -220,7 +220,7 @@ xfs_dir3_data_verify( if (xfs_sb_version_hascrc(&mp->m_sb)) { if (hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC)) return false; - if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid)) + if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) return false; if (be64_to_cpu(hdr3->blkno) != bp->b_bn) return false; @@ -605,7 +605,7 @@ xfs_dir3_data_init( hdr3->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC); hdr3->blkno = cpu_to_be64(bp->b_bn); hdr3->owner = cpu_to_be64(dp->i_ino); - uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid); + uuid_copy(&hdr3->uuid, &mp->m_sb.sb_meta_uuid); } else hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index 106119955..f300240eb 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -160,7 +160,7 @@ xfs_dir3_leaf_verify( if (leaf3->info.hdr.magic != cpu_to_be16(magic3)) return false; - if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_uuid)) + if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid)) return false; if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) return false; @@ -310,7 +310,7 @@ xfs_dir3_leaf_init( : cpu_to_be16(XFS_DIR3_LEAFN_MAGIC); leaf3->info.blkno = cpu_to_be64(bp->b_bn); leaf3->info.owner = cpu_to_be64(owner); - uuid_copy(&leaf3->info.uuid, &mp->m_sb.sb_uuid); + uuid_copy(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid); } else { memset(leaf, 0, sizeof(*leaf)); leaf->hdr.info.magic = cpu_to_be16(type); diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 06bb4218b..cc28e9245 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -93,7 +93,7 @@ xfs_dir3_free_verify( if (hdr3->magic != cpu_to_be32(XFS_DIR3_FREE_MAGIC)) return false; - if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid)) + if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) return false; if (be64_to_cpu(hdr3->blkno) != bp->b_bn) return false; @@ -226,7 +226,7 @@ xfs_dir3_free_get_buf( hdr3->hdr.blkno = cpu_to_be64(bp->b_bn); hdr3->hdr.owner = cpu_to_be64(dp->i_ino); - uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_uuid); + uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_meta_uuid); } else hdr.magic = XFS_DIR2_FREE_MAGIC; dp->d_ops->free_hdr_to_disk(bp->b_addr, &hdr); @@ -1845,8 +1845,7 @@ xfs_dir2_node_addname_int( if (dp->d_ops->db_to_fdb(args->geo, dbno) != fbno) { xfs_alert(mp, - "%s: dir ino %llu needed freesp block %lld for\n" - " data block %lld, got %lld ifbno %llu lastfbno %d", +"%s: dir ino %llu needed freesp block %lld for data block %lld, got %lld ifbno %llu lastfbno %d", __func__, (unsigned long long)dp->i_ino, (long long)dp->d_ops->db_to_fdb( args->geo, dbno), diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index 6fbf2d853..5331b7f04 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -163,7 +163,7 @@ xfs_dqcheck( d->dd_diskdq.d_id = cpu_to_be32(id); if (xfs_sb_version_hascrc(&mp->m_sb)) { - uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid); + uuid_copy(&d->dd_uuid, &mp->m_sb.sb_meta_uuid); xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), XFS_DQUOT_CRC_OFF); } @@ -198,7 +198,7 @@ xfs_dquot_buf_verify_crc( if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk), XFS_DQUOT_CRC_OFF)) return false; - if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid)) + if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_meta_uuid)) return false; } return true; diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index a0ae57205..9590a069e 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -100,7 +100,7 @@ typedef struct xfs_sb { xfs_rfsblock_t sb_dblocks; /* number of data blocks */ xfs_rfsblock_t sb_rblocks; /* number of realtime blocks */ xfs_rtblock_t sb_rextents; /* number of realtime extents */ - uuid_t sb_uuid; /* file system unique id */ + uuid_t sb_uuid; /* user-visible file system unique id */ xfs_fsblock_t sb_logstart; /* starting block of log if internal */ xfs_ino_t sb_rootino; /* root inode number */ xfs_ino_t sb_rbmino; /* bitmap inode for realtime extents */ @@ -174,6 +174,7 @@ typedef struct xfs_sb { xfs_ino_t sb_pquotino; /* project quota inode */ xfs_lsn_t sb_lsn; /* last write sequence */ + uuid_t sb_meta_uuid; /* metadata file system unique id */ /* must be padded to 64 bit alignment */ } xfs_sb_t; @@ -190,7 +191,7 @@ typedef struct xfs_dsb { __be64 sb_dblocks; /* number of data blocks */ __be64 sb_rblocks; /* number of realtime blocks */ __be64 sb_rextents; /* number of realtime extents */ - uuid_t sb_uuid; /* file system unique id */ + uuid_t sb_uuid; /* user-visible file system unique id */ __be64 sb_logstart; /* starting block of log if internal */ __be64 sb_rootino; /* root inode number */ __be64 sb_rbmino; /* bitmap inode for realtime extents */ @@ -260,6 +261,7 @@ typedef struct xfs_dsb { __be64 sb_pquotino; /* project quota inode */ __be64 sb_lsn; /* last write sequence */ + uuid_t sb_meta_uuid; /* metadata file system unique id */ /* must be padded to 64 bit alignment */ } xfs_dsb_t; @@ -458,9 +460,11 @@ xfs_sb_has_ro_compat_feature( #define XFS_SB_FEAT_INCOMPAT_FTYPE (1 << 0) /* filetype in dirent */ #define XFS_SB_FEAT_INCOMPAT_SPINODES (1 << 1) /* sparse inode chunks */ +#define XFS_SB_FEAT_INCOMPAT_META_UUID (1 << 2) /* metadata UUID */ #define XFS_SB_FEAT_INCOMPAT_ALL \ (XFS_SB_FEAT_INCOMPAT_FTYPE| \ - XFS_SB_FEAT_INCOMPAT_SPINODES) + XFS_SB_FEAT_INCOMPAT_SPINODES| \ + XFS_SB_FEAT_INCOMPAT_META_UUID) #define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL static inline bool @@ -514,6 +518,18 @@ static inline bool xfs_sb_version_hassparseinodes(struct xfs_sb *sbp) xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_SPINODES); } +/* + * XFS_SB_FEAT_INCOMPAT_META_UUID indicates that the metadata UUID + * is stored separately from the user-visible UUID; this allows the + * user-visible UUID to be changed on V5 filesystems which have a + * filesystem UUID stamped into every piece of metadata. + */ +static inline bool xfs_sb_version_hasmetauuid(struct xfs_sb *sbp) +{ + return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) && + (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID); +} + /* * end of superblock version macros */ diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 66efc7024..54deb2d12 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -338,7 +338,8 @@ xfs_ialloc_inode_init( if (version == 3) { free->di_ino = cpu_to_be64(ino); ino++; - uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid); + uuid_copy(&free->di_uuid, + &mp->m_sb.sb_meta_uuid); xfs_dinode_calc_crc(mp, free); } else if (tp) { /* just log the inode core */ @@ -2232,7 +2233,7 @@ xfs_imap_lookup( } xfs_trans_brelse(tp, agbp); - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); if (error) return error; @@ -2500,7 +2501,7 @@ xfs_agi_verify( struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); if (xfs_sb_version_hascrc(&mp->m_sb) && - !uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_uuid)) + !uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid)) return false; /* * Validate the magic number of the agi block. diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 674ad8f76..f39b285be 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -239,7 +239,7 @@ xfs_inobt_verify( case cpu_to_be32(XFS_FIBT_CRC_MAGIC): if (!xfs_sb_version_hascrc(&mp->m_sb)) return false; - if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid)) + if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) return false; if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) return false; diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 6526e7696..268c00f4f 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -304,7 +304,7 @@ xfs_dinode_verify( return false; if (be64_to_cpu(dip->di_ino) != ip->i_ino) return false; - if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid)) + if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) return false; return true; } @@ -366,7 +366,7 @@ xfs_iread( if (xfs_sb_version_hascrc(&mp->m_sb)) { ip->i_d.di_version = 3; ip->i_d.di_ino = ip->i_ino; - uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid); + uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_meta_uuid); } else ip->i_d.di_version = 2; return 0; diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index df9851c46..47425140f 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -131,10 +131,11 @@ xfs_mount_validate_sb( if (xfs_sb_has_compat_feature(sbp, XFS_SB_FEAT_COMPAT_UNKNOWN)) { xfs_warn(mp, -"Superblock has unknown compatible features (0x%x) enabled.\n" -"Using a more recent kernel is recommended.", +"Superblock has unknown compatible features (0x%x) enabled.", (sbp->sb_features_compat & XFS_SB_FEAT_COMPAT_UNKNOWN)); + xfs_warn(mp, +"Using a more recent kernel is recommended."); } if (xfs_sb_has_ro_compat_feature(sbp, @@ -145,18 +146,21 @@ xfs_mount_validate_sb( XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { xfs_warn(mp, -"Attempted to mount read-only compatible filesystem read-write.\n" +"Attempted to mount read-only compatible filesystem read-write."); + xfs_warn(mp, "Filesystem can only be safely mounted read only."); + return -EINVAL; } } if (xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_UNKNOWN)) { xfs_warn(mp, -"Superblock has unknown incompatible features (0x%x) enabled.\n" -"Filesystem can not be safely mounted by this kernel.", +"Superblock has unknown incompatible features (0x%x) enabled.", (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_UNKNOWN)); + xfs_warn(mp, +"Filesystem can not be safely mounted by this kernel."); return -EINVAL; } } @@ -182,9 +186,6 @@ xfs_mount_validate_sb( if (xfs_sb_version_hassparseinodes(sbp)) { uint32_t align; - xfs_alert(mp, - "EXPERIMENTAL sparse inode feature enabled. Use at your own risk!"); - align = XFS_INODES_PER_CHUNK * sbp->sb_inodesize >> sbp->sb_blocklog; if (sbp->sb_inoalignmt != align) { @@ -398,6 +399,14 @@ __xfs_sb_from_disk( to->sb_spino_align = be32_to_cpu(from->sb_spino_align); to->sb_pquotino = be64_to_cpu(from->sb_pquotino); to->sb_lsn = be64_to_cpu(from->sb_lsn); + /* + * sb_meta_uuid is only on disk if it differs from sb_uuid and the + * feature flag is set; if not set we keep it only in memory. + */ + if (xfs_sb_version_hasmetauuid(to)) + uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid); + else + uuid_copy(&to->sb_meta_uuid, &from->sb_uuid); /* Convert on-disk flags to in-memory flags? */ if (convert_xquota) xfs_sb_quota_from_disk(to); @@ -539,6 +548,8 @@ xfs_sb_to_disk( cpu_to_be32(from->sb_features_log_incompat); to->sb_spino_align = cpu_to_be32(from->sb_spino_align); to->sb_lsn = cpu_to_be64(from->sb_lsn); + if (xfs_sb_version_hasmetauuid(from)) + uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid); } } diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index e7e26bd64..8f8af05b3 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -63,7 +63,7 @@ xfs_symlink_hdr_set( dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC); dsl->sl_offset = cpu_to_be32(offset); dsl->sl_bytes = cpu_to_be32(size); - uuid_copy(&dsl->sl_uuid, &mp->m_sb.sb_uuid); + uuid_copy(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid); dsl->sl_owner = cpu_to_be64(ino); dsl->sl_blkno = cpu_to_be64(bp->b_bn); bp->b_ops = &xfs_symlink_buf_ops; @@ -107,7 +107,7 @@ xfs_symlink_verify( return false; if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC)) return false; - if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_uuid)) + if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid)) return false; if (bp->b_bn != be64_to_cpu(dsl->sl_blkno)) return false; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 458fced2c..50ab2879b 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -119,8 +119,7 @@ xfs_setfilesize_trans_alloc( * We may pass freeze protection with a transaction. So tell lockdep * we released it. */ - rwsem_release(&ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], - 1, _THIS_IP_); + __sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS); /* * We hand off the transaction to the completion thread now, so * clear the flag here. @@ -171,8 +170,7 @@ xfs_setfilesize_ioend( * Similarly for freeze protection. */ current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); - rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], - 0, 1, _THIS_IP_); + __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); } @@ -351,13 +349,12 @@ xfs_imap_valid( */ STATIC void xfs_end_bio( - struct bio *bio, - int error) + struct bio *bio) { xfs_ioend_t *ioend = bio->bi_private; - if (!ioend->io_error && !test_bit(BIO_UPTODATE, &bio->bi_flags)) - ioend->io_error = error; + if (!ioend->io_error) + ioend->io_error = bio->bi_error; /* Toss bio and pass work off to an xfsdatad thread */ bio->bi_private = NULL; @@ -383,8 +380,7 @@ STATIC struct bio * xfs_alloc_ioend_bio( struct buffer_head *bh) { - int nvecs = bio_get_nr_vecs(bh->b_bdev); - struct bio *bio = bio_alloc(GFP_NOIO, nvecs); + struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); ASSERT(bio->bi_private == NULL); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c deleted file mode 100644 index 0e8885a59..000000000 --- a/fs/xfs/xfs_bit.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_log_format.h" -#include "xfs_bit.h" - -/* - * XFS bit manipulation routines, used in non-realtime code. - */ - -/* - * Return whether bitmap is empty. - * Size is number of words in the bitmap, which is padded to word boundary - * Returns 1 for empty, 0 for non-empty. - */ -int -xfs_bitmap_empty(uint *map, uint size) -{ - uint i; - uint ret = 0; - - for (i = 0; i < size; i++) { - ret |= map[i]; - } - - return (ret == 0); -} - -/* - * Count the number of contiguous bits set in the bitmap starting with bit - * start_bit. Size is the size of the bitmap in words. - */ -int -xfs_contig_bits(uint *map, uint size, uint start_bit) -{ - uint * p = ((unsigned int *) map) + (start_bit >> BIT_TO_WORD_SHIFT); - uint result = 0; - uint tmp; - - size <<= BIT_TO_WORD_SHIFT; - - ASSERT(start_bit < size); - size -= start_bit & ~(NBWORD - 1); - start_bit &= (NBWORD - 1); - if (start_bit) { - tmp = *p++; - /* set to one first offset bits prior to start */ - tmp |= (~0U >> (NBWORD-start_bit)); - if (tmp != ~0U) - goto found; - result += NBWORD; - size -= NBWORD; - } - while (size) { - if ((tmp = *p++) != ~0U) - goto found; - result += NBWORD; - size -= NBWORD; - } - return result - start_bit; -found: - return result + ffz(tmp) - start_bit; -} - -/* - * This takes the bit number to start looking from and - * returns the next set bit from there. It returns -1 - * if there are no more bits set or the start bit is - * beyond the end of the bitmap. - * - * Size is the number of words, not bytes, in the bitmap. - */ -int xfs_next_bit(uint *map, uint size, uint start_bit) -{ - uint * p = ((unsigned int *) map) + (start_bit >> BIT_TO_WORD_SHIFT); - uint result = start_bit & ~(NBWORD - 1); - uint tmp; - - size <<= BIT_TO_WORD_SHIFT; - - if (start_bit >= size) - return -1; - size -= result; - start_bit &= (NBWORD - 1); - if (start_bit) { - tmp = *p++; - /* set to zero first offset bits prior to start */ - tmp &= (~0U << start_bit); - if (tmp != 0U) - goto found; - result += NBWORD; - size -= NBWORD; - } - while (size) { - if ((tmp = *p++) != 0U) - goto found; - result += NBWORD; - size -= NBWORD; - } - return -1; -found: - return result + ffs(tmp) - 1; -} diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 0f34886cf..3bf4ad0d1 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -67,16 +67,15 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) */ int /* error */ xfs_bmap_finish( - xfs_trans_t **tp, /* transaction pointer addr */ - xfs_bmap_free_t *flist, /* i/o: list extents to free */ - int *committed) /* xact committed or not */ + struct xfs_trans **tp, /* transaction pointer addr */ + struct xfs_bmap_free *flist, /* i/o: list extents to free */ + int *committed)/* xact committed or not */ { - xfs_efd_log_item_t *efd; /* extent free data */ - xfs_efi_log_item_t *efi; /* extent free intention */ - int error; /* error return value */ - xfs_bmap_free_item_t *free; /* free extent item */ - xfs_mount_t *mp; /* filesystem mount structure */ - xfs_bmap_free_item_t *next; /* next item on free list */ + struct xfs_efd_log_item *efd; /* extent free data */ + struct xfs_efi_log_item *efi; /* extent free intention */ + int error; /* error return value */ + struct xfs_bmap_free_item *free; /* free extent item */ + struct xfs_bmap_free_item *next; /* next item on free list */ ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); if (flist->xbf_count == 0) { @@ -88,40 +87,48 @@ xfs_bmap_finish( xfs_trans_log_efi_extent(*tp, efi, free->xbfi_startblock, free->xbfi_blockcount); - error = xfs_trans_roll(tp, NULL); - *committed = 1; - /* - * We have a new transaction, so we should return committed=1, - * even though we're returning an error. - */ - if (error) + error = __xfs_trans_roll(tp, NULL, committed); + if (error) { + /* + * If the transaction was committed, drop the EFD reference + * since we're bailing out of here. The other reference is + * dropped when the EFI hits the AIL. + * + * If the transaction was not committed, the EFI is freed by the + * EFI item unlock handler on abort. Also, we have a new + * transaction so we should return committed=1 even though we're + * returning an error. + */ + if (*committed) { + xfs_efi_release(efi); + xfs_force_shutdown((*tp)->t_mountp, + (error == -EFSCORRUPTED) ? + SHUTDOWN_CORRUPT_INCORE : + SHUTDOWN_META_IO_ERROR); + } else { + *committed = 1; + } + return error; + } + /* + * Get an EFD and free each extent in the list, logging to the EFD in + * the process. The remaining bmap free list is cleaned up by the caller + * on error. + */ efd = xfs_trans_get_efd(*tp, efi, flist->xbf_count); for (free = flist->xbf_first; free != NULL; free = next) { next = free->xbfi_next; - if ((error = xfs_free_extent(*tp, free->xbfi_startblock, - free->xbfi_blockcount))) { - /* - * The bmap free list will be cleaned up at a - * higher level. The EFI will be canceled when - * this transaction is aborted. - * Need to force shutdown here to make sure it - * happens, since this transaction may not be - * dirty yet. - */ - mp = (*tp)->t_mountp; - if (!XFS_FORCED_SHUTDOWN(mp)) - xfs_force_shutdown(mp, - (error == -EFSCORRUPTED) ? - SHUTDOWN_CORRUPT_INCORE : - SHUTDOWN_META_IO_ERROR); + + error = xfs_trans_free_extent(*tp, efd, free->xbfi_startblock, + free->xbfi_blockcount); + if (error) return error; - } - xfs_trans_log_efd_extent(*tp, efd, free->xbfi_startblock, - free->xbfi_blockcount); + xfs_bmap_del_free(flist, NULL, free); } + return 0; } @@ -1467,7 +1474,7 @@ xfs_shift_file_space( XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, XFS_QMOPT_RES_REGBLKS); if (error) - goto out; + goto out_trans_cancel; xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); @@ -1481,18 +1488,20 @@ xfs_shift_file_space( &done, stop_fsb, &first_block, &free_list, direction, XFS_BMAP_MAX_SHIFT_EXTENTS); if (error) - goto out; + goto out_bmap_cancel; error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) - goto out; + goto out_bmap_cancel; error = xfs_trans_commit(tp); } return error; -out: +out_bmap_cancel: + xfs_bmap_cancel(&free_list); +out_trans_cancel: xfs_trans_cancel(tp); return error; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index a4b7d92e9..8ecffb359 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -438,7 +438,6 @@ _xfs_buf_find( xfs_buf_flags_t flags, xfs_buf_t *new_bp) { - size_t numbytes; struct xfs_perag *pag; struct rb_node **rbp; struct rb_node *parent; @@ -450,10 +449,9 @@ _xfs_buf_find( for (i = 0; i < nmaps; i++) numblks += map[i].bm_len; - numbytes = BBTOB(numblks); /* Check for IOs smaller than the sector size / not sector aligned */ - ASSERT(!(numbytes < btp->bt_meta_sectorsize)); + ASSERT(!(BBTOB(numblks) < btp->bt_meta_sectorsize)); ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_meta_sectormask)); /* @@ -1096,8 +1094,7 @@ xfs_bwrite( STATIC void xfs_buf_bio_end_io( - struct bio *bio, - int error) + struct bio *bio) { xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; @@ -1105,10 +1102,10 @@ xfs_buf_bio_end_io( * don't overwrite existing errors - otherwise we can lose errors on * buffers that require multiple bios to complete. */ - if (error) { + if (bio->bi_error) { spin_lock(&bp->b_lock); if (!bp->b_io_error) - bp->b_io_error = error; + bp->b_io_error = bio->bi_error; spin_unlock(&bp->b_lock); } @@ -1533,9 +1530,10 @@ xfs_wait_buftarg( list_del_init(&bp->b_lru); if (bp->b_flags & XBF_WRITE_FAIL) { xfs_alert(btp->bt_mount, -"Corruption Alert: Buffer at block 0x%llx had permanent write failures!\n" -"Please run xfs_repair to determine the extent of the problem.", +"Corruption Alert: Buffer at block 0x%llx had permanent write failures!", (long long)bp->b_bn); + xfs_alert(btp->bt_mount, +"Please run xfs_repair to determine the extent of the problem."); } xfs_buf_rele(bp); } diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 331c1ccf8..c79b717d9 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 092d652bc..7e986da34 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -647,11 +647,7 @@ xfs_buf_item_unlock( xfs_buf_item_relse(bp); else if (aborted) { ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp)); - if (lip->li_flags & XFS_LI_IN_AIL) { - spin_lock(&lip->li_ailp->xa_lock); - xfs_trans_ail_delete(lip->li_ailp, lip, - SHUTDOWN_LOG_IO_ERROR); - } + xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR); xfs_buf_item_relse(bp); } } @@ -750,13 +746,13 @@ xfs_buf_item_free_format( * buffer (see xfs_buf_attach_iodone() below), then put the * buf log item at the front. */ -void +int xfs_buf_item_init( - xfs_buf_t *bp, - xfs_mount_t *mp) + struct xfs_buf *bp, + struct xfs_mount *mp) { - xfs_log_item_t *lip = bp->b_fspriv; - xfs_buf_log_item_t *bip; + struct xfs_log_item *lip = bp->b_fspriv; + struct xfs_buf_log_item *bip; int chunks; int map_size; int error; @@ -770,12 +766,11 @@ xfs_buf_item_init( */ ASSERT(bp->b_target->bt_mount == mp); if (lip != NULL && lip->li_type == XFS_LI_BUF) - return; + return 0; bip = kmem_zone_zalloc(xfs_buf_item_zone, KM_SLEEP); xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops); bip->bli_buf = bp; - xfs_buf_hold(bp); /* * chunks is the number of XFS_BLF_CHUNK size pieces the buffer @@ -788,6 +783,11 @@ xfs_buf_item_init( */ error = xfs_buf_item_get_format(bip, bp->b_map_count); ASSERT(error == 0); + if (error) { /* to stop gcc throwing set-but-unused warnings */ + kmem_zone_free(xfs_buf_item_zone, bip); + return error; + } + for (i = 0; i < bip->bli_format_count; i++) { chunks = DIV_ROUND_UP(BBTOB(bp->b_maps[i].bm_len), @@ -807,6 +807,8 @@ xfs_buf_item_init( if (bp->b_fspriv) bip->bli_item.li_bio_list = bp->b_fspriv; bp->b_fspriv = bip; + xfs_buf_hold(bp); + return 0; } diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 3f3455a41..f7eba99d1 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -61,7 +61,7 @@ typedef struct xfs_buf_log_item { struct xfs_buf_log_format __bli_format; /* embedded in-log header */ } xfs_buf_log_item_t; -void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); +int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); void xfs_buf_item_relse(struct xfs_buf *); void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint); uint xfs_buf_item_dirty(xfs_buf_log_item_t *); diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 098cd78fe..a989a9c7e 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -171,6 +171,7 @@ xfs_dir2_block_getdents( int wantoff; /* starting block offset */ xfs_off_t cook; struct xfs_da_geometry *geo = args->geo; + int lock_mode; /* * If the block number in the offset is out of range, we're done. @@ -178,7 +179,9 @@ xfs_dir2_block_getdents( if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk) return 0; + lock_mode = xfs_ilock_data_map_shared(dp); error = xfs_dir3_block_read(NULL, dp, &bp); + xfs_iunlock(dp, lock_mode); if (error) return error; @@ -529,9 +532,12 @@ xfs_dir2_leaf_getdents( * current buffer, need to get another one. */ if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) { + int lock_mode; + lock_mode = xfs_ilock_data_map_shared(dp); error = xfs_dir2_leaf_readbuf(args, bufsize, map_info, &curoff, &bp); + xfs_iunlock(dp, lock_mode); if (error || !map_info->map_valid) break; @@ -653,7 +659,6 @@ xfs_readdir( struct xfs_da_args args = { NULL }; int rval; int v; - uint lock_mode; trace_xfs_readdir(dp); @@ -666,7 +671,7 @@ xfs_readdir( args.dp = dp; args.geo = dp->i_mount->m_dir_geo; - lock_mode = xfs_ilock_data_map_shared(dp); + xfs_ilock(dp, XFS_IOLOCK_SHARED); if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) rval = xfs_dir2_sf_getdents(&args, ctx); else if ((rval = xfs_dir2_isblock(&args, &v))) @@ -675,7 +680,7 @@ xfs_readdir( rval = xfs_dir2_block_getdents(&args, ctx); else rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize); - xfs_iunlock(dp, lock_mode); + xfs_iunlock(dp, XFS_IOLOCK_SHARED); return rval; } diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 4143dc75d..30cb3afb6 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -251,7 +251,7 @@ xfs_qm_init_dquot_blk( d->dd_diskdq.d_id = cpu_to_be32(curid); d->dd_diskdq.d_flags = type; if (xfs_sb_version_hascrc(&mp->m_sb)) { - uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid); + uuid_copy(&d->dd_uuid, &mp->m_sb.sb_meta_uuid); xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), XFS_DQUOT_CRC_OFF); } @@ -954,12 +954,8 @@ xfs_qm_dqflush( struct xfs_log_item *lip = &dqp->q_logitem.qli_item; dqp->dq_flags &= ~XFS_DQ_DIRTY; - spin_lock(&mp->m_ail->xa_lock); - if (lip->li_flags & XFS_LI_IN_AIL) - xfs_trans_ail_delete(mp->m_ail, lip, - SHUTDOWN_CORRUPT_INCORE); - else - spin_unlock(&mp->m_ail->xa_lock); + xfs_trans_ail_remove(lip, SHUTDOWN_CORRUPT_INCORE); + error = -EIO; goto out_unlock; } diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index adc8f8fdd..4aa015321 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -46,28 +46,6 @@ xfs_efi_item_free( kmem_zone_free(xfs_efi_zone, efip); } -/* - * Freeing the efi requires that we remove it from the AIL if it has already - * been placed there. However, the EFI may not yet have been placed in the AIL - * when called by xfs_efi_release() from EFD processing due to the ordering of - * committed vs unpin operations in bulk insert operations. Hence the reference - * count to ensure only the last caller frees the EFI. - */ -STATIC void -__xfs_efi_release( - struct xfs_efi_log_item *efip) -{ - struct xfs_ail *ailp = efip->efi_item.li_ailp; - - if (atomic_dec_and_test(&efip->efi_refcount)) { - spin_lock(&ailp->xa_lock); - /* xfs_trans_ail_delete() drops the AIL lock. */ - xfs_trans_ail_delete(ailp, &efip->efi_item, - SHUTDOWN_LOG_IO_ERROR); - xfs_efi_item_free(efip); - } -} - /* * This returns the number of iovecs needed to log the given efi item. * We only need 1 iovec for an efi item. It just logs the efi_log_format @@ -128,12 +106,12 @@ xfs_efi_item_pin( } /* - * While EFIs cannot really be pinned, the unpin operation is the last place at - * which the EFI is manipulated during a transaction. If we are being asked to - * remove the EFI it's because the transaction has been cancelled and by - * definition that means the EFI cannot be in the AIL so remove it from the - * transaction and free it. Otherwise coordinate with xfs_efi_release() - * to determine who gets to free the EFI. + * The unpin operation is the last place an EFI is manipulated in the log. It is + * either inserted in the AIL or aborted in the event of a log I/O error. In + * either case, the EFI transaction has been successfully committed to make it + * this far. Therefore, we expect whoever committed the EFI to either construct + * and commit the EFD or drop the EFD's reference in the event of error. Simply + * drop the log's EFI reference now that the log is done with it. */ STATIC void xfs_efi_item_unpin( @@ -141,15 +119,7 @@ xfs_efi_item_unpin( int remove) { struct xfs_efi_log_item *efip = EFI_ITEM(lip); - - if (remove) { - ASSERT(!(lip->li_flags & XFS_LI_IN_AIL)); - if (lip->li_desc) - xfs_trans_del_item(lip); - xfs_efi_item_free(efip); - return; - } - __xfs_efi_release(efip); + xfs_efi_release(efip); } /* @@ -167,6 +137,11 @@ xfs_efi_item_push( return XFS_ITEM_PINNED; } +/* + * The EFI has been either committed or aborted if the transaction has been + * cancelled. If the transaction was cancelled, an EFD isn't going to be + * constructed and thus we free the EFI here directly. + */ STATIC void xfs_efi_item_unlock( struct xfs_log_item *lip) @@ -301,23 +276,19 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) } /* - * This is called by the efd item code below to release references to the given - * efi item. Each efd calls this with the number of extents that it has - * logged, and when the sum of these reaches the total number of extents logged - * by this efi item we can free the efi item. + * Freeing the efi requires that we remove it from the AIL if it has already + * been placed there. However, the EFI may not yet have been placed in the AIL + * when called by xfs_efi_release() from EFD processing due to the ordering of + * committed vs unpin operations in bulk insert operations. Hence the reference + * count to ensure only the last caller frees the EFI. */ void -xfs_efi_release(xfs_efi_log_item_t *efip, - uint nextents) +xfs_efi_release( + struct xfs_efi_log_item *efip) { - ASSERT(atomic_read(&efip->efi_next_extent) >= nextents); - if (atomic_sub_and_test(nextents, &efip->efi_next_extent)) { - /* recovery needs us to drop the EFI reference, too */ - if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) - __xfs_efi_release(efip); - - __xfs_efi_release(efip); - /* efip may now have been freed, do not reference it again. */ + if (atomic_dec_and_test(&efip->efi_refcount)) { + xfs_trans_ail_remove(&efip->efi_item, SHUTDOWN_LOG_IO_ERROR); + xfs_efi_item_free(efip); } } @@ -415,20 +386,27 @@ xfs_efd_item_push( return XFS_ITEM_PINNED; } +/* + * The EFD is either committed or aborted if the transaction is cancelled. If + * the transaction is cancelled, drop our reference to the EFI and free the EFD. + */ STATIC void xfs_efd_item_unlock( struct xfs_log_item *lip) { - if (lip->li_flags & XFS_LI_ABORTED) - xfs_efd_item_free(EFD_ITEM(lip)); + struct xfs_efd_log_item *efdp = EFD_ITEM(lip); + + if (lip->li_flags & XFS_LI_ABORTED) { + xfs_efi_release(efdp->efd_efip); + xfs_efd_item_free(efdp); + } } /* - * When the efd item is committed to disk, all we need to do - * is delete our reference to our partner efi item and then - * free ourselves. Since we're freeing ourselves we must - * return -1 to keep the transaction code from further referencing - * this item. + * When the efd item is committed to disk, all we need to do is delete our + * reference to our partner efi item and then free ourselves. Since we're + * freeing ourselves we must return -1 to keep the transaction code from further + * referencing this item. */ STATIC xfs_lsn_t xfs_efd_item_committed( @@ -438,13 +416,14 @@ xfs_efd_item_committed( struct xfs_efd_log_item *efdp = EFD_ITEM(lip); /* - * If we got a log I/O error, it's always the case that the LR with the - * EFI got unpinned and freed before the EFD got aborted. + * Drop the EFI reference regardless of whether the EFD has been + * aborted. Once the EFD transaction is constructed, it is the sole + * responsibility of the EFD to release the EFI (even if the EFI is + * aborted due to log I/O error). */ - if (!(lip->li_flags & XFS_LI_ABORTED)) - xfs_efi_release(efdp->efd_efip, efdp->efd_format.efd_nextents); - + xfs_efi_release(efdp->efd_efip); xfs_efd_item_free(efdp); + return (xfs_lsn_t)-1; } diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h index 0ffbce32d..8fa865170 100644 --- a/fs/xfs/xfs_extfree_item.h +++ b/fs/xfs/xfs_extfree_item.h @@ -39,9 +39,28 @@ struct kmem_zone; * "extent free done" log item described below. * * The EFI is reference counted so that it is not freed prior to both the EFI - * and EFD being committed and unpinned. This ensures that when the last - * reference goes away the EFI will always be in the AIL as it has been - * unpinned, regardless of whether the EFD is processed before or after the EFI. + * and EFD being committed and unpinned. This ensures the EFI is inserted into + * the AIL even in the event of out of order EFI/EFD processing. In other words, + * an EFI is born with two references: + * + * 1.) an EFI held reference to track EFI AIL insertion + * 2.) an EFD held reference to track EFD commit + * + * On allocation, both references are the responsibility of the caller. Once the + * EFI is added to and dirtied in a transaction, ownership of reference one + * transfers to the transaction. The reference is dropped once the EFI is + * inserted to the AIL or in the event of failure along the way (e.g., commit + * failure, log I/O error, etc.). Note that the caller remains responsible for + * the EFD reference under all circumstances to this point. The caller has no + * means to detect failure once the transaction is committed, however. + * Therefore, an EFD is required after this point, even in the event of + * unrelated failure. + * + * Once an EFD is allocated and dirtied in a transaction, reference two + * transfers to the transaction. The EFD reference is dropped once it reaches + * the unpin handler. Similar to the EFI, the reference also drops in the event + * of commit failure or log I/O errors. Note that the EFD is not inserted in the + * AIL, so at this point both the EFI and EFD are freed. */ typedef struct xfs_efi_log_item { xfs_log_item_t efi_item; @@ -77,5 +96,6 @@ xfs_efd_log_item_t *xfs_efd_init(struct xfs_mount *, xfs_efi_log_item_t *, int xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt); void xfs_efi_item_free(xfs_efi_log_item_t *); +void xfs_efi_release(struct xfs_efi_log_item *); #endif /* __XFS_EXTFREE_ITEM_H__ */ diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index db4acc1c3..e78feb400 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -317,24 +317,33 @@ xfs_file_read_iter( return -EIO; /* - * Locking is a bit tricky here. If we take an exclusive lock - * for direct IO, we effectively serialise all new concurrent - * read IO to this file and block it behind IO that is currently in - * progress because IO in progress holds the IO lock shared. We only - * need to hold the lock exclusive to blow away the page cache, so - * only take lock exclusively if the page cache needs invalidation. - * This allows the normal direct IO case of no page cache pages to - * proceeed concurrently without serialisation. + * Locking is a bit tricky here. If we take an exclusive lock for direct + * IO, we effectively serialise all new concurrent read IO to this file + * and block it behind IO that is currently in progress because IO in + * progress holds the IO lock shared. We only need to hold the lock + * exclusive to blow away the page cache, so only take lock exclusively + * if the page cache needs invalidation. This allows the normal direct + * IO case of no page cache pages to proceeed concurrently without + * serialisation. */ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) { xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); + /* + * The generic dio code only flushes the range of the particular + * I/O. Because we take an exclusive lock here, this whole + * sequence is considerably more expensive for us. This has a + * noticeable performance impact for any file with cached pages, + * even when outside of the range of the particular I/O. + * + * Hence, amortize the cost of the lock against a full file + * flush and reduce the chances of repeated iolock cycles going + * forward. + */ if (inode->i_mapping->nrpages) { - ret = filemap_write_and_wait_range( - VFS_I(ip)->i_mapping, - pos, pos + size - 1); + ret = filemap_write_and_wait(VFS_I(ip)->i_mapping); if (ret) { xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); return ret; @@ -345,9 +354,7 @@ xfs_file_read_iter( * we fail to invalidate a page, but this should never * happen on XFS. Warn if it does fail. */ - ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, - pos >> PAGE_CACHE_SHIFT, - (pos + size - 1) >> PAGE_CACHE_SHIFT); + ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping); WARN_ON_ONCE(ret); ret = 0; } @@ -733,19 +740,19 @@ xfs_file_dio_aio_write( pos = iocb->ki_pos; end = pos + count - 1; + /* + * See xfs_file_read_iter() for why we do a full-file flush here. + */ if (mapping->nrpages) { - ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, - pos, end); + ret = filemap_write_and_wait(VFS_I(ip)->i_mapping); if (ret) goto out; /* - * Invalidate whole pages. This can return an error if - * we fail to invalidate a page, but this should never - * happen on XFS. Warn if it does fail. + * Invalidate whole pages. This can return an error if we fail + * to invalidate a page, but this should never happen on XFS. + * Warn if it does fail. */ - ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, - pos >> PAGE_CACHE_SHIFT, - end >> PAGE_CACHE_SHIFT); + ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping); WARN_ON_ONCE(ret); ret = 0; } @@ -1539,8 +1546,36 @@ xfs_filemap_fault( return ret; } +STATIC int +xfs_filemap_pmd_fault( + struct vm_area_struct *vma, + unsigned long addr, + pmd_t *pmd, + unsigned int flags) +{ + struct inode *inode = file_inode(vma->vm_file); + struct xfs_inode *ip = XFS_I(inode); + int ret; + + if (!IS_DAX(inode)) + return VM_FAULT_FALLBACK; + + trace_xfs_filemap_pmd_fault(ip); + + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); + ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_direct, + xfs_end_io_dax_write); + xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); + sb_end_pagefault(inode->i_sb); + + return ret; +} + static const struct vm_operations_struct xfs_file_vm_ops = { .fault = xfs_filemap_fault, + .pmd_fault = xfs_filemap_pmd_fault, .map_pages = filemap_map_pages, .page_mkwrite = xfs_filemap_page_mkwrite, }; @@ -1553,7 +1588,7 @@ xfs_file_mmap( file_accessed(filp); vma->vm_ops = &xfs_file_vm_ops; if (IS_DAX(file_inode(filp))) - vma->vm_flags |= VM_MIXEDMAP; + vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; return 0; } diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 9b3438a76..ee3aaa0a5 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -250,7 +250,7 @@ xfs_growfs_data_private( agf->agf_freeblks = cpu_to_be32(tmpsize); agf->agf_longest = cpu_to_be32(tmpsize); if (xfs_sb_version_hascrc(&mp->m_sb)) - uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_uuid); + uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid); error = xfs_bwrite(bp); xfs_buf_relse(bp); @@ -273,7 +273,7 @@ xfs_growfs_data_private( if (xfs_sb_version_hascrc(&mp->m_sb)) { agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC); agfl->agfl_seqno = cpu_to_be32(agno); - uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_uuid); + uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid); } agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp); @@ -309,7 +309,7 @@ xfs_growfs_data_private( agi->agi_newino = cpu_to_be32(NULLAGINO); agi->agi_dirino = cpu_to_be32(NULLAGINO); if (xfs_sb_version_hascrc(&mp->m_sb)) - uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_uuid); + uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid); if (xfs_sb_version_hasfinobt(&mp->m_sb)) { agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp)); agi->agi_free_level = cpu_to_be32(1); diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 76a9f2783..0a326bd64 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -412,6 +412,8 @@ xfs_iget( if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) return -EINVAL; + XFS_STATS_INC(xs_ig_attempts); + /* get the perag structure and ensure that it's inode capable */ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); agino = XFS_INO_TO_AGINO(mp, ino); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 3da9f4da4..dc40a6d5a 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -164,7 +164,7 @@ xfs_ilock( (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); - ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); + ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); if (lock_flags & XFS_IOLOCK_EXCL) mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); @@ -212,7 +212,7 @@ xfs_ilock_nowait( (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); - ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); + ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); if (lock_flags & XFS_IOLOCK_EXCL) { if (!mrtryupdate(&ip->i_iolock)) @@ -281,7 +281,7 @@ xfs_iunlock( (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); - ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); + ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); ASSERT(lock_flags != 0); if (lock_flags & XFS_IOLOCK_EXCL) @@ -362,32 +362,58 @@ int xfs_lots_retries; int xfs_lock_delays; #endif +/* + * xfs_lockdep_subclass_ok() is only used in an ASSERT, so is only called when + * DEBUG or XFS_WARN is set. And MAX_LOCKDEP_SUBCLASSES is then only defined + * when CONFIG_LOCKDEP is set. Hence the complex define below to avoid build + * errors and warnings. + */ +#if (defined(DEBUG) || defined(XFS_WARN)) && defined(CONFIG_LOCKDEP) +static bool +xfs_lockdep_subclass_ok( + int subclass) +{ + return subclass < MAX_LOCKDEP_SUBCLASSES; +} +#else +#define xfs_lockdep_subclass_ok(subclass) (true) +#endif + /* * Bump the subclass so xfs_lock_inodes() acquires each lock with a different - * value. This shouldn't be called for page fault locking, but we also need to - * ensure we don't overrun the number of lockdep subclasses for the iolock or - * mmaplock as that is limited to 12 by the mmap lock lockdep annotations. + * value. This can be called for any type of inode lock combination, including + * parent locking. Care must be taken to ensure we don't overrun the subclass + * storage fields in the class mask we build. */ static inline int xfs_lock_inumorder(int lock_mode, int subclass) { + int class = 0; + + ASSERT(!(lock_mode & (XFS_ILOCK_PARENT | XFS_ILOCK_RTBITMAP | + XFS_ILOCK_RTSUM))); + ASSERT(xfs_lockdep_subclass_ok(subclass)); + if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { - ASSERT(subclass + XFS_LOCK_INUMORDER < - (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT))); - lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; + ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS); + ASSERT(xfs_lockdep_subclass_ok(subclass + + XFS_IOLOCK_PARENT_VAL)); + class += subclass << XFS_IOLOCK_SHIFT; + if (lock_mode & XFS_IOLOCK_PARENT) + class += XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT; } if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) { - ASSERT(subclass + XFS_LOCK_INUMORDER < - (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT))); - lock_mode |= (subclass + XFS_LOCK_INUMORDER) << - XFS_MMAPLOCK_SHIFT; + ASSERT(subclass <= XFS_MMAPLOCK_MAX_SUBCLASS); + class += subclass << XFS_MMAPLOCK_SHIFT; } - if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) - lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; + if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) { + ASSERT(subclass <= XFS_ILOCK_MAX_SUBCLASS); + class += subclass << XFS_ILOCK_SHIFT; + } - return lock_mode; + return (lock_mode & ~XFS_LOCK_SUBCLASS_MASK) | class; } /* @@ -399,6 +425,11 @@ xfs_lock_inumorder(int lock_mode, int subclass) * transaction (such as truncate). This can result in deadlock since the long * running trans might need to wait for the inode we just locked in order to * push the tail and free space in the log. + * + * xfs_lock_inodes() can only be used to lock one type of lock at a time - + * the iolock, the mmaplock or the ilock, but not more than one at a time. If we + * lock more than one at a time, lockdep will report false positives saying we + * have violated locking orders. */ void xfs_lock_inodes( @@ -409,8 +440,29 @@ xfs_lock_inodes( int attempts = 0, i, j, try_lock; xfs_log_item_t *lp; - /* currently supports between 2 and 5 inodes */ + /* + * Currently supports between 2 and 5 inodes with exclusive locking. We + * support an arbitrary depth of locking here, but absolute limits on + * inodes depend on the the type of locking and the limits placed by + * lockdep annotations in xfs_lock_inumorder. These are all checked by + * the asserts. + */ ASSERT(ips && inodes >= 2 && inodes <= 5); + ASSERT(lock_mode & (XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL | + XFS_ILOCK_EXCL)); + ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED | + XFS_ILOCK_SHARED))); + ASSERT(!(lock_mode & XFS_IOLOCK_EXCL) || + inodes <= XFS_IOLOCK_MAX_SUBCLASS + 1); + ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) || + inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1); + ASSERT(!(lock_mode & XFS_ILOCK_EXCL) || + inodes <= XFS_ILOCK_MAX_SUBCLASS + 1); + + if (lock_mode & XFS_IOLOCK_EXCL) { + ASSERT(!(lock_mode & (XFS_MMAPLOCK_EXCL | XFS_ILOCK_EXCL))); + } else if (lock_mode & XFS_MMAPLOCK_EXCL) + ASSERT(!(lock_mode & XFS_ILOCK_EXCL)); try_lock = 0; i = 0; @@ -629,30 +681,29 @@ xfs_lookup( { xfs_ino_t inum; int error; - uint lock_mode; trace_xfs_lookup(dp, name); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; - lock_mode = xfs_ilock_data_map_shared(dp); + xfs_ilock(dp, XFS_IOLOCK_SHARED); error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); - xfs_iunlock(dp, lock_mode); - if (error) - goto out; + goto out_unlock; error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp); if (error) goto out_free_name; + xfs_iunlock(dp, XFS_IOLOCK_SHARED); return 0; out_free_name: if (ci_name) kmem_free(ci_name->name); -out: +out_unlock: + xfs_iunlock(dp, XFS_IOLOCK_SHARED); *ipp = NULL; return error; } @@ -787,7 +838,7 @@ xfs_ialloc( if (ip->i_d.di_version == 3) { ASSERT(ip->i_d.di_ino == ino); - ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid)); + ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_meta_uuid)); ip->i_d.di_crc = 0; ip->i_d.di_changecount = 1; ip->i_d.di_lsn = 0; @@ -1149,7 +1200,8 @@ xfs_create( goto out_trans_cancel; - xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); + xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL | + XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT); unlock_dp_on_error = true; xfs_bmap_init(&free_list, &first_block); @@ -1175,11 +1227,8 @@ xfs_create( */ error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, resblks > 0, &ip, &committed); - if (error) { - if (error == -ENOSPC) - goto out_trans_cancel; + if (error) goto out_trans_cancel; - } /* * Now we join the directory inode to the transaction. We do not do it @@ -1188,7 +1237,7 @@ xfs_create( * the transaction cancel unlocking dp so don't do it explicitly in the * error path. */ - xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); unlock_dp_on_error = false; error = xfs_dir_createname(tp, dp, name, ip->i_ino, @@ -1261,7 +1310,7 @@ xfs_create( xfs_qm_dqrele(pdqp); if (unlock_dp_on_error) - xfs_iunlock(dp, XFS_ILOCK_EXCL); + xfs_iunlock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); return error; } @@ -1318,11 +1367,8 @@ xfs_create_tmpfile( error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, prid, resblks > 0, &ip, NULL); - if (error) { - if (error == -ENOSPC) - goto out_trans_cancel; + if (error) goto out_trans_cancel; - } if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); @@ -1409,10 +1455,11 @@ xfs_link( if (error) goto error_return; + xfs_ilock(tdp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, tdp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); /* * If we are using project inheritance, we only allow hard link @@ -1791,14 +1838,15 @@ xfs_inactive_ifree( xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1); /* - * Just ignore errors at this point. There is nothing we can - * do except to try to keep going. Make sure it's not a silent - * error. + * Just ignore errors at this point. There is nothing we can do except + * to try to keep going. Make sure it's not a silent error. */ error = xfs_bmap_finish(&tp, &free_list, &committed); - if (error) + if (error) { xfs_notice(mp, "%s: xfs_bmap_finish returned error %d", __func__, error); + xfs_bmap_cancel(&free_list); + } error = xfs_trans_commit(tp); if (error) xfs_notice(mp, "%s: xfs_trans_commit returned error %d", @@ -2515,9 +2563,10 @@ xfs_remove( goto out_trans_cancel; } + xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); /* @@ -2898,6 +2947,12 @@ xfs_rename( * whether the target directory is the same as the source * directory, we can lock from 2 to 4 inodes. */ + if (!new_parent) + xfs_ilock(src_dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); + else + xfs_lock_two_inodes(src_dp, target_dp, + XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); + xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); /* @@ -2905,9 +2960,9 @@ xfs_rename( * we can rely on either trans_commit or trans_cancel to unlock * them. */ - xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, src_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); if (new_parent) - xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, target_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); if (target_ip) xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 8f22d2036..ca9e11989 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -284,9 +284,9 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) * Flags for lockdep annotations. * * XFS_LOCK_PARENT - for directory operations that require locking a - * parent directory inode and a child entry inode. The parent gets locked - * with this flag so it gets a lockdep subclass of 1 and the child entry - * lock will have a lockdep subclass of 0. + * parent directory inode and a child entry inode. IOLOCK requires nesting, + * MMAPLOCK does not support this class, ILOCK requires a single subclass + * to differentiate parent from child. * * XFS_LOCK_RTBITMAP/XFS_LOCK_RTSUM - the realtime device bitmap and summary * inodes do not participate in the normal lock order, and thus have their @@ -295,30 +295,63 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) * XFS_LOCK_INUMORDER - for locking several inodes at the some time * with xfs_lock_inodes(). This flag is used as the starting subclass * and each subsequent lock acquired will increment the subclass by one. - * So the first lock acquired will have a lockdep subclass of 4, the - * second lock will have a lockdep subclass of 5, and so on. It is - * the responsibility of the class builder to shift this to the correct - * portion of the lock_mode lockdep mask. + * However, MAX_LOCKDEP_SUBCLASSES == 8, which means we are greatly + * limited to the subclasses we can represent via nesting. We need at least + * 5 inodes nest depth for the ILOCK through rename, and we also have to support + * XFS_ILOCK_PARENT, which gives 6 subclasses. Then we have XFS_ILOCK_RTBITMAP + * and XFS_ILOCK_RTSUM, which are another 2 unique subclasses, so that's all + * 8 subclasses supported by lockdep. + * + * This also means we have to number the sub-classes in the lowest bits of + * the mask we keep, and we have to ensure we never exceed 3 bits of lockdep + * mask and we can't use bit-masking to build the subclasses. What a mess. + * + * Bit layout: + * + * Bit Lock Region + * 16-19 XFS_IOLOCK_SHIFT dependencies + * 20-23 XFS_MMAPLOCK_SHIFT dependencies + * 24-31 XFS_ILOCK_SHIFT dependencies + * + * IOLOCK values + * + * 0-3 subclass value + * 4-7 PARENT subclass values + * + * MMAPLOCK values + * + * 0-3 subclass value + * 4-7 unused + * + * ILOCK values + * 0-4 subclass values + * 5 PARENT subclass (not nestable) + * 6 RTBITMAP subclass (not nestable) + * 7 RTSUM subclass (not nestable) + * */ -#define XFS_LOCK_PARENT 1 -#define XFS_LOCK_RTBITMAP 2 -#define XFS_LOCK_RTSUM 3 -#define XFS_LOCK_INUMORDER 4 - -#define XFS_IOLOCK_SHIFT 16 -#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) - -#define XFS_MMAPLOCK_SHIFT 20 - -#define XFS_ILOCK_SHIFT 24 -#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) -#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT) -#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT) - -#define XFS_IOLOCK_DEP_MASK 0x000f0000 -#define XFS_MMAPLOCK_DEP_MASK 0x00f00000 -#define XFS_ILOCK_DEP_MASK 0xff000000 -#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \ +#define XFS_IOLOCK_SHIFT 16 +#define XFS_IOLOCK_PARENT_VAL 4 +#define XFS_IOLOCK_MAX_SUBCLASS (XFS_IOLOCK_PARENT_VAL - 1) +#define XFS_IOLOCK_DEP_MASK 0x000f0000 +#define XFS_IOLOCK_PARENT (XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT) + +#define XFS_MMAPLOCK_SHIFT 20 +#define XFS_MMAPLOCK_NUMORDER 0 +#define XFS_MMAPLOCK_MAX_SUBCLASS 3 +#define XFS_MMAPLOCK_DEP_MASK 0x00f00000 + +#define XFS_ILOCK_SHIFT 24 +#define XFS_ILOCK_PARENT_VAL 5 +#define XFS_ILOCK_MAX_SUBCLASS (XFS_ILOCK_PARENT_VAL - 1) +#define XFS_ILOCK_RTBITMAP_VAL 6 +#define XFS_ILOCK_RTSUM_VAL 7 +#define XFS_ILOCK_DEP_MASK 0xff000000 +#define XFS_ILOCK_PARENT (XFS_ILOCK_PARENT_VAL << XFS_ILOCK_SHIFT) +#define XFS_ILOCK_RTBITMAP (XFS_ILOCK_RTBITMAP_VAL << XFS_ILOCK_SHIFT) +#define XFS_ILOCK_RTSUM (XFS_ILOCK_RTSUM_VAL << XFS_ILOCK_SHIFT) + +#define XFS_LOCK_SUBCLASS_MASK (XFS_IOLOCK_DEP_MASK | \ XFS_MMAPLOCK_DEP_MASK | \ XFS_ILOCK_DEP_MASK) diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index bf13a5a7e..62bd80f4e 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -703,17 +703,10 @@ xfs_iflush_abort( xfs_inode_log_item_t *iip = ip->i_itemp; if (iip) { - struct xfs_ail *ailp = iip->ili_item.li_ailp; if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { - spin_lock(&ailp->xa_lock); - if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { - /* xfs_trans_ail_delete() drops the AIL lock. */ - xfs_trans_ail_delete(ailp, &iip->ili_item, - stale ? - SHUTDOWN_LOG_IO_ERROR : + xfs_trans_ail_remove(&iip->ili_item, + stale ? SHUTDOWN_LOG_IO_ERROR : SHUTDOWN_CORRUPT_INCORE); - } else - spin_unlock(&ailp->xa_lock); } iip->ili_logged = 0; /* diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 766b23f86..8294132e6 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -609,7 +609,7 @@ xfs_setattr_nonsize( tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); if (error) - goto out_dqrele; + goto out_trans_cancel; xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -640,7 +640,7 @@ xfs_setattr_nonsize( NULL, capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0); if (error) /* out of quota */ - goto out_trans_cancel; + goto out_unlock; } } @@ -729,10 +729,10 @@ xfs_setattr_nonsize( return 0; +out_unlock: + xfs_iunlock(ip, XFS_ILOCK_EXCL); out_trans_cancel: xfs_trans_cancel(tp); - xfs_iunlock(ip, XFS_ILOCK_EXCL); -out_dqrele: xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); return error; diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index f41b0c3fd..930ebd86b 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -473,7 +473,8 @@ xfs_bulkstat( * pending error, then we are done. */ del_cursor: - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cur, error ? + XFS_BTREE_ERROR : XFS_BTREE_NOERROR); xfs_buf_relse(agbp); if (error) break; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 08d4fe46f..aaadee096 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -668,9 +668,9 @@ xfs_log_mount( ASSERT(0); goto out_free_log; } + xfs_crit(mp, "Log size out of supported range."); xfs_crit(mp, -"Log size out of supported range. Continuing onwards, but if log hangs are\n" -"experienced then please report this message in the bug report."); +"Continuing onwards, but if log hangs are experienced then please report this message in the bug report."); } /* @@ -700,6 +700,7 @@ xfs_log_mount( if (error) { xfs_warn(mp, "log mount/recovery failed: error %d", error); + xlog_recover_cancel(mp->m_log); goto out_destroy_ail; } } @@ -740,18 +741,35 @@ out: * it. */ int -xfs_log_mount_finish(xfs_mount_t *mp) +xfs_log_mount_finish( + struct xfs_mount *mp) { int error = 0; - if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) { - error = xlog_recover_finish(mp->m_log); - if (!error) - xfs_log_work_queue(mp); - } else { + if (mp->m_flags & XFS_MOUNT_NORECOVERY) { ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); + return 0; } + error = xlog_recover_finish(mp->m_log); + if (!error) + xfs_log_work_queue(mp); + + return error; +} + +/* + * The mount has failed. Cancel the recovery if it hasn't completed and destroy + * the log. + */ +int +xfs_log_mount_cancel( + struct xfs_mount *mp) +{ + int error; + + error = xlog_recover_cancel(mp->m_log); + xfs_log_unmount(mp); return error; } @@ -1142,11 +1160,13 @@ xlog_space_left( * In this case we just want to return the size of the * log as the amount of space left. */ + xfs_alert(log->l_mp, "xlog_space_left: head behind tail"); xfs_alert(log->l_mp, - "xlog_space_left: head behind tail\n" - " tail_cycle = %d, tail_bytes = %d\n" - " GH cycle = %d, GH bytes = %d", - tail_cycle, tail_bytes, head_cycle, head_bytes); + " tail_cycle = %d, tail_bytes = %d", + tail_cycle, tail_bytes); + xfs_alert(log->l_mp, + " GH cycle = %d, GH bytes = %d", + head_cycle, head_bytes); ASSERT(0); free_bytes = log->l_logsize; } @@ -1652,8 +1672,13 @@ xlog_cksum( if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { union xlog_in_core2 *xhdr = (union xlog_in_core2 *)rhead; int i; + int xheads; + + xheads = size / XLOG_HEADER_CYCLE_SIZE; + if (size % XLOG_HEADER_CYCLE_SIZE) + xheads++; - for (i = 1; i < log->l_iclog_heads; i++) { + for (i = 1; i < xheads; i++) { crc = crc32c(crc, &xhdr[i].hic_xheader, sizeof(struct xlog_rec_ext_header)); } @@ -2028,26 +2053,24 @@ xlog_print_tic_res( "SWAPEXT" }; - xfs_warn(mp, - "xlog_write: reservation summary:\n" - " trans type = %s (%u)\n" - " unit res = %d bytes\n" - " current res = %d bytes\n" - " total reg = %u bytes (o/flow = %u bytes)\n" - " ophdrs = %u (ophdr space = %u bytes)\n" - " ophdr + reg = %u bytes\n" - " num regions = %u", - ((ticket->t_trans_type <= 0 || - ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ? + xfs_warn(mp, "xlog_write: reservation summary:"); + xfs_warn(mp, " trans type = %s (%u)", + ((ticket->t_trans_type <= 0 || + ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ? "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]), - ticket->t_trans_type, - ticket->t_unit_res, - ticket->t_curr_res, - ticket->t_res_arr_sum, ticket->t_res_o_flow, - ticket->t_res_num_ophdrs, ophdr_spc, - ticket->t_res_arr_sum + - ticket->t_res_o_flow + ophdr_spc, - ticket->t_res_num); + ticket->t_trans_type); + xfs_warn(mp, " unit res = %d bytes", + ticket->t_unit_res); + xfs_warn(mp, " current res = %d bytes", + ticket->t_curr_res); + xfs_warn(mp, " total reg = %u bytes (o/flow = %u bytes)", + ticket->t_res_arr_sum, ticket->t_res_o_flow); + xfs_warn(mp, " ophdrs = %u (ophdr space = %u bytes)", + ticket->t_res_num_ophdrs, ophdr_spc); + xfs_warn(mp, " ophdr + reg = %u bytes", + ticket->t_res_arr_sum + ticket->t_res_o_flow + ophdr_spc); + xfs_warn(mp, " num regions = %u", + ticket->t_res_num); for (i = 0; i < ticket->t_res_num; i++) { uint r_type = ticket->t_res_arr[i].r_type; diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index fa27aaec7..09d91d316 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -147,6 +147,7 @@ int xfs_log_mount(struct xfs_mount *mp, xfs_daddr_t start_block, int num_bblocks); int xfs_log_mount_finish(struct xfs_mount *mp); +int xfs_log_mount_cancel(struct xfs_mount *); xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp); void xfs_log_space_wake(struct xfs_mount *mp); diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index abc2ccbff..4e7649351 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -307,7 +307,13 @@ xlog_cil_insert_items( if (!(lidp->lid_flags & XFS_LID_DIRTY)) continue; - list_move_tail(&lip->li_cil, &cil->xc_cil); + /* + * Only move the item if it isn't already at the tail. This is + * to prevent a transient list_empty() state when reinserting + * an item that is already the only item in the CIL. + */ + if (!list_is_last(&lip->li_cil, &cil->xc_cil)) + list_move_tail(&lip->li_cil, &cil->xc_cil); } /* account for space used by new iovec headers */ diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 1c87c8abf..950f3f947 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -426,6 +426,8 @@ xlog_recover( extern int xlog_recover_finish( struct xlog *log); +extern int +xlog_recover_cancel(struct xlog *); extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead, char *dp, int size); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 480ebba84..512a0945d 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1895,15 +1895,25 @@ xlog_recover_get_buf_lsn( */ goto recover_immediately; case XFS_SB_MAGIC: + /* + * superblock uuids are magic. We may or may not have a + * sb_meta_uuid on disk, but it will be set in the in-core + * superblock. We set the uuid pointer for verification + * according to the superblock feature mask to ensure we check + * the relevant UUID in the superblock. + */ lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn); - uuid = &((struct xfs_dsb *)blk)->sb_uuid; + if (xfs_sb_version_hasmetauuid(&mp->m_sb)) + uuid = &((struct xfs_dsb *)blk)->sb_meta_uuid; + else + uuid = &((struct xfs_dsb *)blk)->sb_uuid; break; default: break; } if (lsn != (xfs_lsn_t)-1) { - if (!uuid_equal(&mp->m_sb.sb_uuid, uuid)) + if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid)) goto recover_immediately; return lsn; } @@ -2933,16 +2943,16 @@ xlog_recover_efi_pass2( struct xlog_recover_item *item, xfs_lsn_t lsn) { - int error; - xfs_mount_t *mp = log->l_mp; - xfs_efi_log_item_t *efip; - xfs_efi_log_format_t *efi_formatp; + int error; + struct xfs_mount *mp = log->l_mp; + struct xfs_efi_log_item *efip; + struct xfs_efi_log_format *efi_formatp; efi_formatp = item->ri_buf[0].i_addr; efip = xfs_efi_init(mp, efi_formatp->efi_nextents); - if ((error = xfs_efi_copy_format(&(item->ri_buf[0]), - &(efip->efi_format)))) { + error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format); + if (error) { xfs_efi_item_free(efip); return error; } @@ -2950,20 +2960,23 @@ xlog_recover_efi_pass2( spin_lock(&log->l_ailp->xa_lock); /* - * xfs_trans_ail_update() drops the AIL lock. + * The EFI has two references. One for the EFD and one for EFI to ensure + * it makes it into the AIL. Insert the EFI into the AIL directly and + * drop the EFI reference. Note that xfs_trans_ail_update() drops the + * AIL lock. */ xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn); + xfs_efi_release(efip); return 0; } /* - * This routine is called when an efd format structure is found in - * a committed transaction in the log. It's purpose is to cancel - * the corresponding efi if it was still in the log. To do this - * it searches the AIL for the efi with an id equal to that in the - * efd format structure. If we find it, we remove the efi from the - * AIL and free it. + * This routine is called when an EFD format structure is found in a committed + * transaction in the log. Its purpose is to cancel the corresponding EFI if it + * was still in the log. To do this it searches the AIL for the EFI with an id + * equal to that in the EFD format structure. If we find it we drop the EFD + * reference, which removes the EFI from the AIL and frees it. */ STATIC int xlog_recover_efd_pass2( @@ -2985,8 +2998,8 @@ xlog_recover_efd_pass2( efi_id = efd_formatp->efd_efi_id; /* - * Search for the efi with the id in the efd format structure - * in the AIL. + * Search for the EFI with the id in the EFD format structure in the + * AIL. */ spin_lock(&ailp->xa_lock); lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); @@ -2995,18 +3008,18 @@ xlog_recover_efd_pass2( efip = (xfs_efi_log_item_t *)lip; if (efip->efi_format.efi_id == efi_id) { /* - * xfs_trans_ail_delete() drops the - * AIL lock. + * Drop the EFD reference to the EFI. This + * removes the EFI from the AIL and frees it. */ - xfs_trans_ail_delete(ailp, lip, - SHUTDOWN_CORRUPT_INCORE); - xfs_efi_item_free(efip); + spin_unlock(&ailp->xa_lock); + xfs_efi_release(efip); spin_lock(&ailp->xa_lock); break; } } lip = xfs_trans_ail_cursor_next(ailp, &cur); } + xfs_trans_ail_cursor_done(&cur); spin_unlock(&ailp->xa_lock); @@ -3034,6 +3047,11 @@ xlog_recover_do_icreate_pass2( unsigned int count; unsigned int isize; xfs_agblock_t length; + int blks_per_cluster; + int bb_per_cluster; + int cancel_count; + int nbufs; + int i; icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr; if (icl->icl_type != XFS_LI_ICREATE) { @@ -3092,22 +3110,45 @@ xlog_recover_do_icreate_pass2( } /* - * Inode buffers can be freed. Do not replay the inode initialisation as - * we could be overwriting something written after this inode buffer was - * cancelled. + * The icreate transaction can cover multiple cluster buffers and these + * buffers could have been freed and reused. Check the individual + * buffers for cancellation so we don't overwrite anything written after + * a cancellation. + */ + blks_per_cluster = xfs_icluster_size_fsb(mp); + bb_per_cluster = XFS_FSB_TO_BB(mp, blks_per_cluster); + nbufs = length / blks_per_cluster; + for (i = 0, cancel_count = 0; i < nbufs; i++) { + xfs_daddr_t daddr; + + daddr = XFS_AGB_TO_DADDR(mp, agno, + agbno + i * blks_per_cluster); + if (xlog_check_buffer_cancelled(log, daddr, bb_per_cluster, 0)) + cancel_count++; + } + + /* + * We currently only use icreate for a single allocation at a time. This + * means we should expect either all or none of the buffers to be + * cancelled. Be conservative and skip replay if at least one buffer is + * cancelled, but warn the user that something is awry if the buffers + * are not consistent. * - * XXX: we need to iterate all buffers and only init those that are not - * cancelled. I think that a more fine grained factoring of - * xfs_ialloc_inode_init may be appropriate here to enable this to be - * done easily. + * XXX: This must be refined to only skip cancelled clusters once we use + * icreate for multiple chunk allocations. */ - if (xlog_check_buffer_cancelled(log, - XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0)) + ASSERT(!cancel_count || cancel_count == nbufs); + if (cancel_count) { + if (cancel_count != nbufs) + xfs_warn(mp, + "WARNING: partial inode chunk cancellation, skipped icreate."); + trace_xfs_log_recover_icreate_cancel(log, icl); return 0; + } - xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno, length, - be32_to_cpu(icl->icl_gen)); - return 0; + trace_xfs_log_recover_icreate_recover(log, icl); + return xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno, + length, be32_to_cpu(icl->icl_gen)); } STATIC void @@ -3385,14 +3426,24 @@ xlog_recover_add_to_cont_trans( char *ptr, *old_ptr; int old_len; + /* + * If the transaction is empty, the header was split across this and the + * previous record. Copy the rest of the header. + */ if (list_empty(&trans->r_itemq)) { - /* finish copying rest of trans header */ + ASSERT(len < sizeof(struct xfs_trans_header)); + if (len > sizeof(struct xfs_trans_header)) { + xfs_warn(log->l_mp, "%s: bad header length", __func__); + return -EIO; + } + xlog_recover_add_item(&trans->r_itemq); ptr = (char *)&trans->r_theader + - sizeof(xfs_trans_header_t) - len; + sizeof(struct xfs_trans_header) - len; memcpy(ptr, dp, len); return 0; } + /* take the tail entry */ item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); @@ -3441,7 +3492,19 @@ xlog_recover_add_to_trans( ASSERT(0); return -EIO; } - if (len == sizeof(xfs_trans_header_t)) + + if (len > sizeof(struct xfs_trans_header)) { + xfs_warn(log->l_mp, "%s: bad header length", __func__); + ASSERT(0); + return -EIO; + } + + /* + * The transaction header can be arbitrarily split across op + * records. If we don't have the whole thing here, copy what we + * do have and handle the rest in the next record. + */ + if (len == sizeof(struct xfs_trans_header)) xlog_recover_add_item(&trans->r_itemq); memcpy(&trans->r_theader, dp, len); return 0; @@ -3744,7 +3807,7 @@ xlog_recover_process_efi( * free the memory associated with it. */ set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); - xfs_efi_release(efip, efip->efi_format.efi_nextents); + xfs_efi_release(efip); return -EIO; } } @@ -3757,11 +3820,11 @@ xlog_recover_process_efi( for (i = 0; i < efip->efi_format.efi_nextents; i++) { extp = &(efip->efi_format.efi_extents[i]); - error = xfs_free_extent(tp, extp->ext_start, extp->ext_len); + error = xfs_trans_free_extent(tp, efdp, extp->ext_start, + extp->ext_len); if (error) goto abort_error; - xfs_trans_log_efd_extent(tp, efdp, extp->ext_start, - extp->ext_len); + } set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); @@ -3793,10 +3856,10 @@ abort_error: */ STATIC int xlog_recover_process_efis( - struct xlog *log) + struct xlog *log) { - xfs_log_item_t *lip; - xfs_efi_log_item_t *efip; + struct xfs_log_item *lip; + struct xfs_efi_log_item *efip; int error = 0; struct xfs_ail_cursor cur; struct xfs_ail *ailp; @@ -3820,7 +3883,7 @@ xlog_recover_process_efis( /* * Skip EFIs that we've already processed. */ - efip = (xfs_efi_log_item_t *)lip; + efip = container_of(lip, struct xfs_efi_log_item, efi_item); if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) { lip = xfs_trans_ail_cursor_next(ailp, &cur); continue; @@ -3839,6 +3902,50 @@ out: return error; } +/* + * A cancel occurs when the mount has failed and we're bailing out. Release all + * pending EFIs so they don't pin the AIL. + */ +STATIC int +xlog_recover_cancel_efis( + struct xlog *log) +{ + struct xfs_log_item *lip; + struct xfs_efi_log_item *efip; + int error = 0; + struct xfs_ail_cursor cur; + struct xfs_ail *ailp; + + ailp = log->l_ailp; + spin_lock(&ailp->xa_lock); + lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); + while (lip != NULL) { + /* + * We're done when we see something other than an EFI. + * There should be no EFIs left in the AIL now. + */ + if (lip->li_type != XFS_LI_EFI) { +#ifdef DEBUG + for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur)) + ASSERT(lip->li_type != XFS_LI_EFI); +#endif + break; + } + + efip = container_of(lip, struct xfs_efi_log_item, efi_item); + + spin_unlock(&ailp->xa_lock); + xfs_efi_release(efip); + spin_lock(&ailp->xa_lock); + + lip = xfs_trans_ail_cursor_next(ailp, &cur); + } + + xfs_trans_ail_cursor_done(&cur); + spin_unlock(&ailp->xa_lock); + return error; +} + /* * This routine performs a transaction to null out a bad inode pointer * in an agi unlinked inode hash bucket. @@ -4532,11 +4639,13 @@ xlog_recover( xfs_sb_has_incompat_log_feature(&log->l_mp->m_sb, XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)) { xfs_warn(log->l_mp, -"Superblock has unknown incompatible log features (0x%x) enabled.\n" -"The log can not be fully and/or safely recovered by this kernel.\n" -"Please recover the log on a kernel that supports the unknown features.", +"Superblock has unknown incompatible log features (0x%x) enabled.", (log->l_mp->m_sb.sb_features_log_incompat & XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)); + xfs_warn(log->l_mp, +"The log can not be fully and/or safely recovered by this kernel."); + xfs_warn(log->l_mp, +"Please recover the log on a kernel that supports the unknown features."); return -EINVAL; } @@ -4612,6 +4721,17 @@ xlog_recover_finish( return 0; } +int +xlog_recover_cancel( + struct xlog *log) +{ + int error = 0; + + if (log->l_flags & XLOG_RECOVERY_NEEDED) + error = xlog_recover_cancel_efis(log); + + return error; +} #if defined(DEBUG) /* diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 461e791ef..bf92e0c03 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -615,14 +615,14 @@ xfs_default_resblks(xfs_mount_t *mp) */ int xfs_mountfs( - xfs_mount_t *mp) + struct xfs_mount *mp) { - xfs_sb_t *sbp = &(mp->m_sb); - xfs_inode_t *rip; - __uint64_t resblks; - uint quotamount = 0; - uint quotaflags = 0; - int error = 0; + struct xfs_sb *sbp = &(mp->m_sb); + struct xfs_inode *rip; + __uint64_t resblks; + uint quotamount = 0; + uint quotaflags = 0; + int error = 0; xfs_sb_mount_common(mp, sbp); @@ -799,7 +799,9 @@ xfs_mountfs( } /* - * log's mount-time initialization. Perform 1st part recovery if needed + * Log's mount-time initialization. The first part of recovery can place + * some items on the AIL, to be handled when recovery is finished or + * cancelled. */ error = xfs_log_mount(mp, mp->m_logdev_targp, XFS_FSB_TO_DADDR(mp, sbp->sb_logstart), @@ -910,9 +912,9 @@ xfs_mountfs( } /* - * Finish recovering the file system. This part needed to be - * delayed until after the root and real-time bitmap inodes - * were consistently read in. + * Finish recovering the file system. This part needed to be delayed + * until after the root and real-time bitmap inodes were consistently + * read in. */ error = xfs_log_mount_finish(mp); if (error) { @@ -955,8 +957,10 @@ xfs_mountfs( xfs_rtunmount_inodes(mp); out_rele_rip: IRELE(rip); + cancel_delayed_work_sync(&mp->m_reclaim_work); + xfs_reclaim_inodes(mp, SYNC_WAIT); out_log_dealloc: - xfs_log_unmount(mp); + xfs_log_mount_cancel(mp); out_fail_wait: if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) xfs_wait_buftarg(mp->m_logdev_targp); diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index f4e8c06ee..ab1bac6a3 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -757,31 +757,30 @@ xfs_rtallocate_extent_size( /* * Allocate space to the bitmap or summary file, and zero it, for growfs. */ -STATIC int /* error */ +STATIC int xfs_growfs_rt_alloc( - xfs_mount_t *mp, /* file system mount point */ - xfs_extlen_t oblocks, /* old count of blocks */ - xfs_extlen_t nblocks, /* new count of blocks */ - xfs_inode_t *ip) /* inode (bitmap/summary) */ + struct xfs_mount *mp, /* file system mount point */ + xfs_extlen_t oblocks, /* old count of blocks */ + xfs_extlen_t nblocks, /* new count of blocks */ + struct xfs_inode *ip) /* inode (bitmap/summary) */ { - xfs_fileoff_t bno; /* block number in file */ - xfs_buf_t *bp; /* temporary buffer for zeroing */ - int committed; /* transaction committed flag */ - xfs_daddr_t d; /* disk block address */ - int error; /* error return value */ - xfs_fsblock_t firstblock; /* first block allocated in xaction */ - xfs_bmap_free_t flist; /* list of freed blocks */ - xfs_fsblock_t fsbno; /* filesystem block for bno */ - xfs_bmbt_irec_t map; /* block map output */ - int nmap; /* number of block maps */ - int resblks; /* space reservation */ + xfs_fileoff_t bno; /* block number in file */ + struct xfs_buf *bp; /* temporary buffer for zeroing */ + int committed; /* transaction committed flag */ + xfs_daddr_t d; /* disk block address */ + int error; /* error return value */ + xfs_fsblock_t firstblock;/* first block allocated in xaction */ + struct xfs_bmap_free flist; /* list of freed blocks */ + xfs_fsblock_t fsbno; /* filesystem block for bno */ + struct xfs_bmbt_irec map; /* block map output */ + int nmap; /* number of block maps */ + int resblks; /* space reservation */ + struct xfs_trans *tp; /* * Allocate space to the file, as necessary. */ while (oblocks < nblocks) { - xfs_trans_t *tp; - tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC); resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks); /* @@ -790,7 +789,7 @@ xfs_growfs_rt_alloc( error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtalloc, resblks, 0); if (error) - goto error_cancel; + goto out_trans_cancel; /* * Lock the inode. */ @@ -808,16 +807,16 @@ xfs_growfs_rt_alloc( if (!error && nmap < 1) error = -ENOSPC; if (error) - goto error_cancel; + goto out_bmap_cancel; /* * Free any blocks freed up in the transaction, then commit. */ error = xfs_bmap_finish(&tp, &flist, &committed); if (error) - goto error_cancel; + goto out_bmap_cancel; error = xfs_trans_commit(tp); if (error) - goto error; + return error; /* * Now we need to clear the allocated blocks. * Do this one block per transaction, to keep it simple. @@ -832,7 +831,7 @@ xfs_growfs_rt_alloc( error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtzero, 0, 0); if (error) - goto error_cancel; + goto out_trans_cancel; /* * Lock the bitmap inode. */ @@ -846,9 +845,7 @@ xfs_growfs_rt_alloc( mp->m_bsize, 0); if (bp == NULL) { error = -EIO; -error_cancel: - xfs_trans_cancel(tp); - goto error; + goto out_trans_cancel; } memset(bp->b_addr, 0, mp->m_sb.sb_blocksize); xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); @@ -857,16 +854,20 @@ error_cancel: */ error = xfs_trans_commit(tp); if (error) - goto error; + return error; } /* * Go on to the next extent, if any. */ oblocks = map.br_startoff + map.br_blockcount; } + return 0; -error: +out_bmap_cancel: + xfs_bmap_cancel(&flist); +out_trans_cancel: + xfs_trans_cancel(tp); return error; } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index bbd9b1f10..904f637cf 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -261,16 +261,8 @@ xfs_parseargs( mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL); if (!mp->m_rtname) return -ENOMEM; - } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { - if (!value || !*value) { - xfs_warn(mp, "%s option requires an argument", - this_char); - return -EINVAL; - } - if (kstrtoint(value, 10, &iosize)) - return -EINVAL; - iosizelog = ffs(iosize) - 1; - } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { + } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE) || + !strcmp(this_char, MNTOPT_BIOSIZE)) { if (!value || !*value) { xfs_warn(mp, "%s option requires an argument", this_char); @@ -1528,6 +1520,10 @@ xfs_fs_fill_super( } } + if (xfs_sb_version_hassparseinodes(&mp->m_sb)) + xfs_alert(mp, + "EXPERIMENTAL sparse inode feature enabled. Use at your own risk!"); + error = xfs_mountfs(mp); if (error) goto out_filestream_unmount; diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 4be27b021..996481eeb 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -240,7 +240,8 @@ xfs_symlink( if (error) goto out_trans_cancel; - xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); + xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL | + XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT); unlock_dp_on_error = true; /* @@ -288,7 +289,7 @@ xfs_symlink( * the transaction cancel unlocking dp so don't do it explicitly in the * error path. */ - xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); unlock_dp_on_error = false; /* @@ -421,7 +422,7 @@ out_release_inode: xfs_qm_dqrele(pdqp); if (unlock_dp_on_error) - xfs_iunlock(dp, XFS_ILOCK_EXCL); + xfs_iunlock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); return error; } @@ -501,7 +502,7 @@ xfs_inactive_symlink_rmt( /* * Unmap the dead block(s) to the free_list. */ - error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, + error = xfs_bunmapi(tp, ip, 0, size, 0, nmaps, &first_block, &free_list, &done); if (error) goto error_bmap_cancel; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 8d916d33d..5ed36b1e0 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -687,6 +687,7 @@ DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag); DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid); DEFINE_INODE_EVENT(xfs_filemap_fault); +DEFINE_INODE_EVENT(xfs_filemap_pmd_fault); DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite); DECLARE_EVENT_CLASS(xfs_iref_class, @@ -2089,6 +2090,40 @@ DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover); DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel); DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip); +DECLARE_EVENT_CLASS(xfs_log_recover_icreate_item_class, + TP_PROTO(struct xlog *log, struct xfs_icreate_log *in_f), + TP_ARGS(log, in_f), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, agbno) + __field(unsigned int, count) + __field(unsigned int, isize) + __field(xfs_agblock_t, length) + __field(unsigned int, gen) + ), + TP_fast_assign( + __entry->dev = log->l_mp->m_super->s_dev; + __entry->agno = be32_to_cpu(in_f->icl_ag); + __entry->agbno = be32_to_cpu(in_f->icl_agbno); + __entry->count = be32_to_cpu(in_f->icl_count); + __entry->isize = be32_to_cpu(in_f->icl_isize); + __entry->length = be32_to_cpu(in_f->icl_length); + __entry->gen = be32_to_cpu(in_f->icl_gen); + ), + TP_printk("dev %d:%d agno %u agbno %u count %u isize %u length %u " + "gen %u", MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, __entry->agbno, __entry->count, __entry->isize, + __entry->length, __entry->gen) +) +#define DEFINE_LOG_RECOVER_ICREATE_ITEM(name) \ +DEFINE_EVENT(xfs_log_recover_icreate_item_class, name, \ + TP_PROTO(struct xlog *log, struct xfs_icreate_log *in_f), \ + TP_ARGS(log, in_f)) + +DEFINE_LOG_RECOVER_ICREATE_ITEM(xfs_log_recover_icreate_cancel); +DEFINE_LOG_RECOVER_ICREATE_ITEM(xfs_log_recover_icreate_recover); + DECLARE_EVENT_CLASS(xfs_discard_class, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t len), diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 0582a2710..a0ab1dae9 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1019,9 +1019,10 @@ xfs_trans_cancel( * chunk we've been working on and get a new transaction to continue. */ int -xfs_trans_roll( +__xfs_trans_roll( struct xfs_trans **tpp, - struct xfs_inode *dp) + struct xfs_inode *dp, + int *committed) { struct xfs_trans *trans; struct xfs_trans_res tres; @@ -1052,6 +1053,7 @@ xfs_trans_roll( if (error) return error; + *committed = 1; trans = *tpp; /* @@ -1074,3 +1076,12 @@ xfs_trans_roll( xfs_trans_ijoin(trans, dp, 0); return 0; } + +int +xfs_trans_roll( + struct xfs_trans **tpp, + struct xfs_inode *dp) +{ + int committed = 0; + return __xfs_trans_roll(tpp, dp, &committed); +} diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 3b21b4e5e..4643070d7 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -213,7 +213,6 @@ void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint); void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint); void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint); struct xfs_efi_log_item *xfs_trans_get_efi(xfs_trans_t *, uint); -void xfs_efi_release(struct xfs_efi_log_item *, uint); void xfs_trans_log_efi_extent(xfs_trans_t *, struct xfs_efi_log_item *, xfs_fsblock_t, @@ -221,11 +220,11 @@ void xfs_trans_log_efi_extent(xfs_trans_t *, struct xfs_efd_log_item *xfs_trans_get_efd(xfs_trans_t *, struct xfs_efi_log_item *, uint); -void xfs_trans_log_efd_extent(xfs_trans_t *, - struct xfs_efd_log_item *, - xfs_fsblock_t, - xfs_extlen_t); +int xfs_trans_free_extent(struct xfs_trans *, + struct xfs_efd_log_item *, xfs_fsblock_t, + xfs_extlen_t); int xfs_trans_commit(struct xfs_trans *); +int __xfs_trans_roll(struct xfs_trans **, struct xfs_inode *, int *); int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); void xfs_trans_cancel(xfs_trans_t *); int xfs_trans_ail_init(struct xfs_mount *); diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c index 284397dd7..a96ae540e 100644 --- a/fs/xfs/xfs_trans_extfree.c +++ b/fs/xfs/xfs_trans_extfree.c @@ -25,6 +25,7 @@ #include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_extfree_item.h" +#include "xfs_alloc.h" /* * This routine is called to allocate an "extent free intention" @@ -108,19 +109,30 @@ xfs_trans_get_efd(xfs_trans_t *tp, } /* - * This routine is called to indicate that the described - * extent is to be logged as having been freed. It should - * be called once for each extent freed. + * Free an extent and log it to the EFD. Note that the transaction is marked + * dirty regardless of whether the extent free succeeds or fails to support the + * EFI/EFD lifecycle rules. */ -void -xfs_trans_log_efd_extent(xfs_trans_t *tp, - xfs_efd_log_item_t *efdp, - xfs_fsblock_t start_block, - xfs_extlen_t ext_len) +int +xfs_trans_free_extent( + struct xfs_trans *tp, + struct xfs_efd_log_item *efdp, + xfs_fsblock_t start_block, + xfs_extlen_t ext_len) { uint next_extent; - xfs_extent_t *extp; + struct xfs_extent *extp; + int error; + error = xfs_free_extent(tp, start_block, ext_len); + + /* + * Mark the transaction dirty, even on error. This ensures the + * transaction is aborted, which: + * + * 1.) releases the EFI and frees the EFD + * 2.) shuts down the filesystem + */ tp->t_flags |= XFS_TRANS_DIRTY; efdp->efd_item.li_desc->lid_flags |= XFS_LID_DIRTY; @@ -130,4 +142,6 @@ xfs_trans_log_efd_extent(xfs_trans_t *tp, extp->ext_start = start_block; extp->ext_len = ext_len; efdp->efd_next_extent++; + + return error; } diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 1b7362945..49931b72d 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -119,6 +119,21 @@ xfs_trans_ail_delete( xfs_trans_ail_delete_bulk(ailp, &lip, 1, shutdown_type); } +static inline void +xfs_trans_ail_remove( + struct xfs_log_item *lip, + int shutdown_type) +{ + struct xfs_ail *ailp = lip->li_ailp; + + spin_lock(&ailp->xa_lock); + /* xfs_trans_ail_delete() drops the AIL lock */ + if (lip->li_flags & XFS_LI_IN_AIL) + xfs_trans_ail_delete(ailp, lip, shutdown_type); + else + spin_unlock(&ailp->xa_lock); +} + void xfs_ail_push(struct xfs_ail *, xfs_lsn_t); void xfs_ail_push_all(struct xfs_ail *); void xfs_ail_push_all_sync(struct xfs_ail *); -- cgit v1.2.3-54-g00ecf