summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/acl.c24
-rw-r--r--fs/9p/cache.c8
-rw-r--r--fs/9p/v9fs.c2
-rw-r--r--fs/9p/v9fs.h2
-rw-r--r--fs/9p/vfs_file.c8
-rw-r--r--fs/9p/vfs_inode.c26
-rw-r--r--fs/9p/vfs_inode_dotl.c21
-rw-r--r--fs/9p/xattr.c4
-rw-r--r--fs/Kconfig14
-rw-r--r--fs/Makefile1
-rw-r--r--fs/adfs/adfs.h32
-rw-r--r--fs/adfs/dir.c6
-rw-r--r--fs/adfs/dir_f.c2
-rw-r--r--fs/adfs/dir_fplus.c2
-rw-r--r--fs/adfs/super.c2
-rw-r--r--fs/affs/affs.h2
-rw-r--r--fs/affs/amigaffs.c13
-rw-r--r--fs/affs/file.c13
-rw-r--r--fs/affs/inode.c1
-rw-r--r--fs/affs/namei.c1
-rw-r--r--fs/affs/super.c4
-rw-r--r--fs/affs/symlink.c9
-rw-r--r--fs/afs/flock.c4
-rw-r--r--fs/afs/inode.c1
-rw-r--r--fs/afs/proc.c25
-rw-r--r--fs/afs/super.c2
-rw-r--r--fs/afs/write.c4
-rw-r--r--fs/attr.c2
-rw-r--r--fs/aufs/Kconfig185
-rw-r--r--fs/aufs/Makefile36
-rw-r--r--fs/aufs/aufs.h46
-rw-r--r--fs/aufs/branch.c1394
-rw-r--r--fs/aufs/branch.h266
-rw-r--r--fs/aufs/cpup.c1366
-rw-r--r--fs/aufs/cpup.h81
-rw-r--r--fs/aufs/dbgaufs.c419
-rw-r--r--fs/aufs/dbgaufs.h35
-rw-r--r--fs/aufs/dcsub.c211
-rw-r--r--fs/aufs/dcsub.h123
-rw-r--r--fs/aufs/debug.c425
-rw-r--r--fs/aufs/debug.h212
-rw-r--r--fs/aufs/dentry.c1123
-rw-r--r--fs/aufs/dentry.h221
-rw-r--r--fs/aufs/dinfo.c537
-rw-r--r--fs/aufs/dir.c745
-rw-r--r--fs/aufs/dir.h118
-rw-r--r--fs/aufs/dynop.c356
-rw-r--r--fs/aufs/dynop.h61
-rw-r--r--fs/aufs/export.c819
-rw-r--r--fs/aufs/f_op.c759
-rw-r--r--fs/aufs/fhsm.c412
-rw-r--r--fs/aufs/file.c831
-rw-r--r--fs/aufs/file.h278
-rw-r--r--fs/aufs/finfo.c143
-rw-r--r--fs/aufs/fstype.h387
-rw-r--r--fs/aufs/hfsnotify.c275
-rw-r--r--fs/aufs/hfsplus.c43
-rw-r--r--fs/aufs/hnotify.c697
-rw-r--r--fs/aufs/i_op.c1477
-rw-r--r--fs/aufs/i_op_add.c919
-rw-r--r--fs/aufs/i_op_del.c497
-rw-r--r--fs/aufs/i_op_ren.c1002
-rw-r--r--fs/aufs/iinfo.c264
-rw-r--r--fs/aufs/inode.c514
-rw-r--r--fs/aufs/inode.h672
-rw-r--r--fs/aufs/ioctl.c206
-rw-r--r--fs/aufs/loop.c133
-rw-r--r--fs/aufs/loop.h39
-rw-r--r--fs/aufs/magic.mk30
-rw-r--r--fs/aufs/module.c207
-rw-r--r--fs/aufs/module.h92
-rw-r--r--fs/aufs/mvdown.c690
-rw-r--r--fs/aufs/opts.c1846
-rw-r--r--fs/aufs/opts.h198
-rw-r--r--fs/aufs/plink.c515
-rw-r--r--fs/aufs/poll.c39
-rw-r--r--fs/aufs/posix_acl.c85
-rw-r--r--fs/aufs/procfs.c156
-rw-r--r--fs/aufs/rdu.c375
-rw-r--r--fs/aufs/rwsem.h178
-rw-r--r--fs/aufs/sbinfo.c353
-rw-r--r--fs/aufs/spl.h98
-rw-r--r--fs/aufs/super.c1026
-rw-r--r--fs/aufs/super.h628
-rw-r--r--fs/aufs/sysaufs.c91
-rw-r--r--fs/aufs/sysaufs.h88
-rw-r--r--fs/aufs/sysfs.c340
-rw-r--r--fs/aufs/sysrq.c144
-rw-r--r--fs/aufs/vdir.c875
-rw-r--r--fs/aufs/vfsub.c853
-rw-r--r--fs/aufs/vfsub.h295
-rw-r--r--fs/aufs/wbr_policy.c752
-rw-r--r--fs/aufs/whout.c1047
-rw-r--r--fs/aufs/whout.h72
-rw-r--r--fs/aufs/wkq.c200
-rw-r--r--fs/aufs/wkq.h78
-rw-r--r--fs/aufs/xattr.c331
-rw-r--r--fs/aufs/xino.c1305
-rw-r--r--fs/autofs4/symlink.c14
-rw-r--r--fs/bad_inode.c2
-rw-r--r--fs/befs/linuxvfs.c42
-rw-r--r--fs/bfs/inode.c2
-rw-r--r--fs/binfmt_elf.c8
-rw-r--r--fs/binfmt_misc.c12
-rw-r--r--fs/block_dev.c168
-rw-r--r--fs/btrfs/Makefile5
-rw-r--r--fs/btrfs/acl.c12
-rw-r--r--fs/btrfs/async-thread.c4
-rw-r--r--fs/btrfs/backref.c25
-rw-r--r--fs/btrfs/btrfs_inode.h4
-rw-r--r--fs/btrfs/check-integrity.c105
-rw-r--r--fs/btrfs/compression.c6
-rw-r--r--fs/btrfs/ctree.c11
-rw-r--r--fs/btrfs/ctree.h187
-rw-r--r--fs/btrfs/delayed-inode.c7
-rw-r--r--fs/btrfs/delayed-ref.c4
-rw-r--r--fs/btrfs/delayed-ref.h8
-rw-r--r--fs/btrfs/dev-replace.c2
-rw-r--r--fs/btrfs/disk-io.c148
-rw-r--r--fs/btrfs/disk-io.h5
-rw-r--r--fs/btrfs/extent-tree.c224
-rw-r--r--fs/btrfs/extent-tree.h0
-rw-r--r--fs/btrfs/extent_io.c355
-rw-r--r--fs/btrfs/extent_io.h139
-rw-r--r--fs/btrfs/extent_map.c2
-rw-r--r--fs/btrfs/extent_map.h10
-rw-r--r--fs/btrfs/file-item.c4
-rw-r--r--fs/btrfs/file.c59
-rw-r--r--fs/btrfs/free-space-cache.c28
-rw-r--r--fs/btrfs/free-space-cache.h2
-rw-r--r--fs/btrfs/free-space-tree.c1605
-rw-r--r--fs/btrfs/free-space-tree.h72
-rw-r--r--fs/btrfs/inode-map.c8
-rw-r--r--fs/btrfs/inode.c369
-rw-r--r--fs/btrfs/ioctl.c298
-rw-r--r--fs/btrfs/locking.c2
-rw-r--r--fs/btrfs/raid56.c102
-rw-r--r--fs/btrfs/relocation.c19
-rw-r--r--fs/btrfs/scrub.c24
-rw-r--r--fs/btrfs/send.h4
-rw-r--r--fs/btrfs/super.c93
-rw-r--r--fs/btrfs/sysfs.c35
-rw-r--r--fs/btrfs/sysfs.h5
-rw-r--r--fs/btrfs/tests/btrfs-tests.c64
-rw-r--r--fs/btrfs/tests/btrfs-tests.h10
-rw-r--r--fs/btrfs/tests/extent-io-tests.c159
-rw-r--r--fs/btrfs/tests/free-space-tests.c239
-rw-r--r--fs/btrfs/tests/free-space-tree-tests.c571
-rw-r--r--fs/btrfs/tests/inode-tests.c10
-rw-r--r--fs/btrfs/tests/qgroup-tests.c20
-rw-r--r--fs/btrfs/transaction.c50
-rw-r--r--fs/btrfs/transaction.h2
-rw-r--r--fs/btrfs/tree-defrag.c27
-rw-r--r--fs/btrfs/tree-log.c14
-rw-r--r--fs/btrfs/volumes.c169
-rw-r--r--fs/btrfs/volumes.h3
-rw-r--r--fs/btrfs/xattr.c172
-rw-r--r--fs/btrfs/xattr.h2
-rw-r--r--fs/buffer.c21
-rw-r--r--fs/cachefiles/daemon.c12
-rw-r--r--fs/cachefiles/interface.c4
-rw-r--r--fs/cachefiles/namei.c40
-rw-r--r--fs/ceph/acl.c16
-rw-r--r--fs/ceph/addr.c18
-rw-r--r--fs/ceph/cache.c12
-rw-r--r--fs/ceph/caps.c31
-rw-r--r--fs/ceph/dir.c4
-rw-r--r--fs/ceph/export.c4
-rw-r--r--fs/ceph/file.c527
-rw-r--r--fs/ceph/inode.c12
-rw-r--r--fs/ceph/mds_client.c16
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--fs/ceph/super.c4
-rw-r--r--fs/ceph/super.h1
-rw-r--r--fs/cifs/cifs_dfs_ref.c2
-rw-r--r--fs/cifs/cifsfs.c86
-rw-r--r--fs/cifs/cifsfs.h6
-rw-r--r--fs/cifs/cifsglob.h16
-rw-r--r--fs/cifs/cifsproto.h5
-rw-r--r--fs/cifs/connect.c37
-rw-r--r--fs/cifs/file.c20
-rw-r--r--fs/cifs/inode.c24
-rw-r--r--fs/cifs/ioctl.c126
-rw-r--r--fs/cifs/link.c10
-rw-r--r--fs/cifs/misc.c2
-rw-r--r--fs/cifs/smb2misc.c36
-rw-r--r--fs/cifs/smb2ops.c13
-rw-r--r--fs/cifs/smb2pdu.c10
-rw-r--r--fs/cifs/smb2pdu.h8
-rw-r--r--fs/cifs/smb2proto.h3
-rw-r--r--fs/cifs/smb2transport.c102
-rw-r--r--fs/cifs/xattr.c16
-rw-r--r--fs/coda/cnode.c5
-rw-r--r--fs/coda/coda_linux.h3
-rw-r--r--fs/coda/dir.c4
-rw-r--r--fs/coda/file.c8
-rw-r--r--fs/coda/inode.c6
-rw-r--r--fs/coda/symlink.c4
-rw-r--r--fs/compat.c21
-rw-r--r--fs/compat_ioctl.c272
-rw-r--r--fs/configfs/configfs_internal.h14
-rw-r--r--fs/configfs/dir.c222
-rw-r--r--fs/configfs/file.c259
-rw-r--r--fs/configfs/inode.c6
-rw-r--r--fs/configfs/symlink.c22
-rw-r--r--fs/coredump.c28
-rw-r--r--fs/cramfs/inode.c1
-rw-r--r--fs/dax.c610
-rw-r--r--fs/dcache.c28
-rw-r--r--fs/debugfs/inode.c22
-rw-r--r--fs/devpts/inode.c12
-rw-r--r--fs/direct-io.c8
-rw-r--r--fs/dlm/user.c11
-rw-r--r--fs/drop_caches.c7
-rw-r--r--fs/ecryptfs/inode.c53
-rw-r--r--fs/ecryptfs/main.c6
-rw-r--r--fs/ecryptfs/mmap.c4
-rw-r--r--fs/efivarfs/file.c8
-rw-r--r--fs/efivarfs/super.c4
-rw-r--r--fs/efs/inode.c1
-rw-r--r--fs/efs/super.c6
-rw-r--r--fs/efs/symlink.c4
-rw-r--r--fs/eventfd.c4
-rw-r--r--fs/eventpoll.c54
-rw-r--r--fs/exec.c9
-rw-r--r--fs/exofs/file.c4
-rw-r--r--fs/exofs/inode.c1
-rw-r--r--fs/exofs/namei.c1
-rw-r--r--fs/exofs/super.c4
-rw-r--r--fs/exportfs/expfs.c12
-rw-r--r--fs/ext2/file.c23
-rw-r--r--fs/ext2/inode.c17
-rw-r--r--fs/ext2/ioctl.c12
-rw-r--r--fs/ext2/namei.c1
-rw-r--r--fs/ext2/super.c2
-rw-r--r--fs/ext2/symlink.c5
-rw-r--r--fs/ext2/xattr.c21
-rw-r--r--fs/ext2/xattr_security.c21
-rw-r--r--fs/ext2/xattr_trusted.c23
-rw-r--r--fs/ext2/xattr_user.c23
-rw-r--r--fs/ext4/crypto.c62
-rw-r--r--fs/ext4/dir.c13
-rw-r--r--fs/ext4/ext4.h102
-rw-r--r--fs/ext4/extents.c177
-rw-r--r--fs/ext4/file.c100
-rw-r--r--fs/ext4/ialloc.c7
-rw-r--r--fs/ext4/inline.c10
-rw-r--r--fs/ext4/inode.c325
-rw-r--r--fs/ext4/ioctl.c393
-rw-r--r--fs/ext4/mballoc.c2
-rw-r--r--fs/ext4/move_extent.c1
-rw-r--r--fs/ext4/namei.c65
-rw-r--r--fs/ext4/page-io.c5
-rw-r--r--fs/ext4/super.c103
-rw-r--r--fs/ext4/symlink.c29
-rw-r--r--fs/ext4/truncate.h2
-rw-r--r--fs/ext4/xattr.c23
-rw-r--r--fs/ext4/xattr_security.c22
-rw-r--r--fs/ext4/xattr_trusted.c23
-rw-r--r--fs/ext4/xattr_user.c23
-rw-r--r--fs/f2fs/checkpoint.c177
-rw-r--r--fs/f2fs/data.c381
-rw-r--r--fs/f2fs/debug.c35
-rw-r--r--fs/f2fs/dir.c38
-rw-r--r--fs/f2fs/extent_cache.c122
-rw-r--r--fs/f2fs/f2fs.h148
-rw-r--r--fs/f2fs/file.c360
-rw-r--r--fs/f2fs/gc.c9
-rw-r--r--fs/f2fs/gc.h8
-rw-r--r--fs/f2fs/inline.c9
-rw-r--r--fs/f2fs/inode.c29
-rw-r--r--fs/f2fs/namei.c90
-rw-r--r--fs/f2fs/node.c170
-rw-r--r--fs/f2fs/node.h6
-rw-r--r--fs/f2fs/recovery.c40
-rw-r--r--fs/f2fs/segment.c122
-rw-r--r--fs/f2fs/shrinker.c3
-rw-r--r--fs/f2fs/super.c258
-rw-r--r--fs/f2fs/xattr.c95
-rw-r--r--fs/f2fs/xattr.h2
-rw-r--r--fs/fat/cache.c79
-rw-r--r--fs/fat/dir.c6
-rw-r--r--fs/fat/fat.h8
-rw-r--r--fs/fat/fatent.c24
-rw-r--r--fs/fat/file.c69
-rw-r--r--fs/fat/inode.c106
-rw-r--r--fs/fcntl.c8
-rw-r--r--fs/file.c13
-rw-r--r--fs/file_table.c4
-rw-r--r--fs/filesystems.c6
-rw-r--r--fs/freevxfs/vxfs_inode.c1
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--fs/fuse/dir.c27
-rw-r--r--fs/fuse/file.c101
-rw-r--r--fs/fuse/fuse_i.h3
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/gfs2/acl.c4
-rw-r--r--fs/gfs2/acl.h2
-rw-r--r--fs/gfs2/aops.c2
-rw-r--r--fs/gfs2/bmap.c13
-rw-r--r--fs/gfs2/dir.c175
-rw-r--r--fs/gfs2/file.c38
-rw-r--r--fs/gfs2/glock.c18
-rw-r--r--fs/gfs2/glock.h26
-rw-r--r--fs/gfs2/glops.c2
-rw-r--r--fs/gfs2/incore.h23
-rw-r--r--fs/gfs2/inode.c71
-rw-r--r--fs/gfs2/log.c3
-rw-r--r--fs/gfs2/main.c19
-rw-r--r--fs/gfs2/meta_io.c82
-rw-r--r--fs/gfs2/meta_io.h2
-rw-r--r--fs/gfs2/ops_fstype.c10
-rw-r--r--fs/gfs2/quota.c125
-rw-r--r--fs/gfs2/quota.h2
-rw-r--r--fs/gfs2/rgrp.c57
-rw-r--r--fs/gfs2/rgrp.h6
-rw-r--r--fs/gfs2/super.c43
-rw-r--r--fs/gfs2/util.c2
-rw-r--r--fs/gfs2/util.h2
-rw-r--r--fs/gfs2/xattr.c60
-rw-r--r--fs/gfs2/xattr.h1
-rw-r--r--fs/hfs/catalog.c6
-rw-r--r--fs/hfs/dir.c4
-rw-r--r--fs/hfs/inode.c8
-rw-r--r--fs/hfs/mdb.c4
-rw-r--r--fs/hfs/super.c4
-rw-r--r--fs/hfsplus/dir.c4
-rw-r--r--fs/hfsplus/inode.c10
-rw-r--r--fs/hfsplus/ioctl.c4
-rw-r--r--fs/hfsplus/posix_acl.c8
-rw-r--r--fs/hfsplus/super.c2
-rw-r--r--fs/hfsplus/xattr.c12
-rw-r--r--fs/hostfs/hostfs_kern.c28
-rw-r--r--fs/hpfs/dir.c6
-rw-r--r--fs/hpfs/inode.c1
-rw-r--r--fs/hpfs/map.c2
-rw-r--r--fs/hpfs/namei.c5
-rw-r--r--fs/hpfs/super.c2
-rw-r--r--fs/hugetlbfs/inode.c190
-rw-r--r--fs/inode.c26
-rw-r--r--fs/internal.h9
-rw-r--r--fs/ioctl.c75
-rw-r--r--fs/isofs/inode.c3
-rw-r--r--fs/isofs/rock.c4
-rw-r--r--fs/jbd2/transaction.c6
-rw-r--r--fs/jffs2/build.c8
-rw-r--r--fs/jffs2/dir.c11
-rw-r--r--fs/jffs2/file.c4
-rw-r--r--fs/jffs2/fs.c5
-rw-r--r--fs/jffs2/security.c22
-rw-r--r--fs/jffs2/super.c7
-rw-r--r--fs/jffs2/symlink.c2
-rw-r--r--fs/jffs2/wbuf.c2
-rw-r--r--fs/jffs2/xattr.c26
-rw-r--r--fs/jffs2/xattr_trusted.c21
-rw-r--r--fs/jffs2/xattr_user.c20
-rw-r--r--fs/jfs/acl.c8
-rw-r--r--fs/jfs/file.c6
-rw-r--r--fs/jfs/inode.c1
-rw-r--r--fs/jfs/ioctl.c6
-rw-r--r--fs/jfs/jfs_logmgr.c9
-rw-r--r--fs/jfs/namei.c1
-rw-r--r--fs/jfs/super.c8
-rw-r--r--fs/jfs/symlink.c5
-rw-r--r--fs/kernfs/dir.c59
-rw-r--r--fs/kernfs/inode.c4
-rw-r--r--fs/kernfs/symlink.c24
-rw-r--r--fs/libfs.c32
-rw-r--r--fs/lockd/svc.c79
-rw-r--r--fs/locks.c119
-rw-r--r--fs/logfs/Kconfig2
-rw-r--r--fs/logfs/dir.c9
-rw-r--r--fs/logfs/file.c8
-rw-r--r--fs/logfs/inode.c6
-rw-r--r--fs/logfs/logfs.h7
-rw-r--r--fs/logfs/readwrite.c4
-rw-r--r--fs/logfs/segment.c2
-rw-r--r--fs/minix/inode.c6
-rw-r--r--fs/minix/itree_v1.c9
-rw-r--r--fs/minix/itree_v2.c9
-rw-r--r--fs/namei.c396
-rw-r--r--fs/namespace.c61
-rw-r--r--fs/ncpfs/dir.c10
-rw-r--r--fs/ncpfs/file.c4
-rw-r--r--fs/ncpfs/inode.c6
-rw-r--r--fs/nfs/blocklayout/extent_tree.c10
-rw-r--r--fs/nfs/callback_proc.c52
-rw-r--r--fs/nfs/dir.c34
-rw-r--r--fs/nfs/direct.c60
-rw-r--r--fs/nfs/file.c10
-rw-r--r--fs/nfs/filelayout/filelayout.c20
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c205
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.h1
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c117
-rw-r--r--fs/nfs/inode.c60
-rw-r--r--fs/nfs/internal.h41
-rw-r--r--fs/nfs/nfs3acl.c4
-rw-r--r--fs/nfs/nfs42proc.c156
-rw-r--r--fs/nfs/nfs4file.c111
-rw-r--r--fs/nfs/nfs4proc.c143
-rw-r--r--fs/nfs/nfs4sysctl.c2
-rw-r--r--fs/nfs/nfs4trace.c1
-rw-r--r--fs/nfs/nfs4trace.h431
-rw-r--r--fs/nfs/nfstrace.h1
-rw-r--r--fs/nfs/pagelist.c126
-rw-r--r--fs/nfs/pnfs.c345
-rw-r--r--fs/nfs/pnfs.h58
-rw-r--r--fs/nfs/pnfs_nfs.c10
-rw-r--r--fs/nfs/read.c43
-rw-r--r--fs/nfs/symlink.c39
-rw-r--r--fs/nfs/write.c140
-rw-r--r--fs/nfsd/lockd.c2
-rw-r--r--fs/nfsd/netns.h2
-rw-r--r--fs/nfsd/nfs3xdr.c2
-rw-r--r--fs/nfsd/nfs4callback.c6
-rw-r--r--fs/nfsd/nfs4layouts.c39
-rw-r--r--fs/nfsd/nfs4proc.c67
-rw-r--r--fs/nfsd/nfs4recover.c18
-rw-r--r--fs/nfsd/nfs4state.c70
-rw-r--r--fs/nfsd/nfs4xdr.c29
-rw-r--r--fs/nfsd/nfsfh.h27
-rw-r--r--fs/nfsd/nfssvc.c75
-rw-r--r--fs/nfsd/state.h8
-rw-r--r--fs/nfsd/trace.h41
-rw-r--r--fs/nfsd/vfs.c50
-rw-r--r--fs/nfsd/vfs.h2
-rw-r--r--fs/nfsd/xdr4.h10
-rw-r--r--fs/nilfs2/inode.c5
-rw-r--r--fs/nilfs2/ioctl.c4
-rw-r--r--fs/nilfs2/namei.c4
-rw-r--r--fs/nilfs2/super.c9
-rw-r--r--fs/notify/group.c4
-rw-r--r--fs/notify/inode_mark.c3
-rw-r--r--fs/notify/mark.c53
-rw-r--r--fs/ntfs/dir.c4
-rw-r--r--fs/ntfs/file.c8
-rw-r--r--fs/ntfs/quota.c6
-rw-r--r--fs/ntfs/super.c16
-rw-r--r--fs/ocfs2/alloc.c47
-rw-r--r--fs/ocfs2/alloc.h2
-rw-r--r--fs/ocfs2/aops.c4
-rw-r--r--fs/ocfs2/cluster/heartbeat.c18
-rw-r--r--fs/ocfs2/cluster/nodemanager.c2
-rw-r--r--fs/ocfs2/dir.c4
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h11
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c9
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c15
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c2
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c2
-rw-r--r--fs/ocfs2/dlmglue.c8
-rw-r--r--fs/ocfs2/file.c20
-rw-r--r--fs/ocfs2/inode.c13
-rw-r--r--fs/ocfs2/ioctl.c16
-rw-r--r--fs/ocfs2/journal.c18
-rw-r--r--fs/ocfs2/localalloc.c26
-rw-r--r--fs/ocfs2/mmap.c4
-rw-r--r--fs/ocfs2/move_extents.c16
-rw-r--r--fs/ocfs2/namei.c50
-rw-r--r--fs/ocfs2/quota.h2
-rw-r--r--fs/ocfs2/quota_global.c6
-rw-r--r--fs/ocfs2/refcounttree.c12
-rw-r--r--fs/ocfs2/resize.c8
-rw-r--r--fs/ocfs2/slot_map.c14
-rw-r--r--fs/ocfs2/suballoc.c12
-rw-r--r--fs/ocfs2/super.c13
-rw-r--r--fs/ocfs2/symlink.c3
-rw-r--r--fs/ocfs2/xattr.c182
-rw-r--r--fs/open.c19
-rw-r--r--fs/openpromfs/inode.c2
-rw-r--r--fs/overlayfs/copy_up.c4
-rw-r--r--fs/overlayfs/dir.c22
-rw-r--r--fs/overlayfs/inode.c82
-rw-r--r--fs/overlayfs/overlayfs.h3
-rw-r--r--fs/overlayfs/readdir.c20
-rw-r--r--fs/overlayfs/super.c58
-rw-r--r--fs/pipe.c47
-rw-r--r--fs/pnode.c9
-rw-r--r--fs/posix_acl.c25
-rw-r--r--fs/proc/base.c56
-rw-r--r--fs/proc/fd.c1
-rw-r--r--fs/proc/inode.c24
-rw-r--r--fs/proc/kcore.c4
-rw-r--r--fs/proc/meminfo.c5
-rw-r--r--fs/proc/namespaces.c10
-rw-r--r--fs/proc/nommu.c5
-rw-r--r--fs/proc/page.c4
-rw-r--r--fs/proc/self.c22
-rw-r--r--fs/proc/task_mmu.c210
-rw-r--r--fs/proc/task_nommu.c54
-rw-r--r--fs/proc/thread_self.c23
-rw-r--r--fs/proc_namespace.c27
-rw-r--r--fs/pstore/inode.c6
-rw-r--r--fs/qnx4/inode.c3
-rw-r--r--fs/qnx6/inode.c3
-rw-r--r--fs/quota/dquot.c22
-rw-r--r--fs/quota/netlink.c5
-rw-r--r--fs/quota/quota_v2.c4
-rw-r--r--fs/ramfs/inode.c1
-rw-r--r--fs/read_write.c375
-rw-r--r--fs/readdir.c2
-rw-r--r--fs/reiserfs/dir.c4
-rw-r--r--fs/reiserfs/file.c4
-rw-r--r--fs/reiserfs/inode.c1
-rw-r--r--fs/reiserfs/ioctl.c2
-rw-r--r--fs/reiserfs/journal.c24
-rw-r--r--fs/reiserfs/namei.c4
-rw-r--r--fs/reiserfs/prints.c9
-rw-r--r--fs/reiserfs/procfs.c5
-rw-r--r--fs/reiserfs/super.c5
-rw-r--r--fs/reiserfs/xattr.c80
-rw-r--r--fs/reiserfs/xattr_acl.c8
-rw-r--r--fs/reiserfs/xattr_security.c16
-rw-r--r--fs/reiserfs/xattr_trusted.c15
-rw-r--r--fs/reiserfs/xattr_user.c14
-rw-r--r--fs/romfs/super.c5
-rw-r--r--fs/select.c6
-rw-r--r--fs/splice.c20
-rw-r--r--fs/squashfs/inode.c2
-rw-r--r--fs/squashfs/super.c10
-rw-r--r--fs/squashfs/symlink.c3
-rw-r--r--fs/squashfs/xattr.c38
-rw-r--r--fs/stat.c2
-rw-r--r--fs/super.c8
-rw-r--r--fs/sysv/inode.c6
-rw-r--r--fs/tracefs/inode.c34
-rw-r--r--fs/ubifs/dir.c18
-rw-r--r--fs/ubifs/file.c6
-rw-r--r--fs/ubifs/key.h6
-rw-r--r--fs/ubifs/super.c4
-rw-r--r--fs/ubifs/xattr.c8
-rw-r--r--fs/udf/balloc.c98
-rw-r--r--fs/udf/file.c10
-rw-r--r--fs/udf/inode.c233
-rw-r--r--fs/udf/namei.c8
-rw-r--r--fs/udf/super.c24
-rw-r--r--fs/udf/symlink.c4
-rw-r--r--fs/udf/udfdecl.h5
-rw-r--r--fs/ufs/Makefile2
-rw-r--r--fs/ufs/inode.c5
-rw-r--r--fs/ufs/namei.c5
-rw-r--r--fs/ufs/super.c2
-rw-r--r--fs/ufs/symlink.c42
-rw-r--r--fs/ufs/ufs.h4
-rw-r--r--fs/userfaultfd.c6
-rw-r--r--fs/utimes.c4
-rw-r--r--fs/xattr.c212
-rw-r--r--fs/xfs/kmem.h1
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c4
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h1
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c35
-rw-r--r--fs/xfs/libxfs/xfs_attr.c141
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c1
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c32
-rw-r--r--fs/xfs/libxfs/xfs_bit.c6
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c43
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h2
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c1
-rw-r--r--fs/xfs/libxfs/xfs_btree.c58
-rw-r--r--fs/xfs/libxfs/xfs_btree.h3
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c1
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c1
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c1
-rw-r--r--fs/xfs/libxfs/xfs_dquot_buf.c37
-rw-r--r--fs/xfs/libxfs/xfs_format.h11
-rw-r--r--fs/xfs/libxfs/xfs_fs.h38
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c1
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c27
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c4
-rw-r--r--fs/xfs/libxfs/xfs_log_recover.h1
-rw-r--r--fs/xfs/libxfs/xfs_quota_defs.h2
-rw-r--r--fs/xfs/libxfs/xfs_sb.c2
-rw-r--r--fs/xfs/libxfs/xfs_shared.h1
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c1
-rw-r--r--fs/xfs/xfs_acl.c23
-rw-r--r--fs/xfs/xfs_acl.h4
-rw-r--r--fs/xfs/xfs_aops.c8
-rw-r--r--fs/xfs/xfs_aops.h1
-rw-r--r--fs/xfs/xfs_bmap_util.c46
-rw-r--r--fs/xfs/xfs_buf.c10
-rw-r--r--fs/xfs/xfs_buf.h1
-rw-r--r--fs/xfs/xfs_dquot.c13
-rw-r--r--fs/xfs/xfs_error.c4
-rw-r--r--fs/xfs/xfs_file.c38
-rw-r--r--fs/xfs/xfs_inode.c85
-rw-r--r--fs/xfs/xfs_ioctl.c92
-rw-r--r--fs/xfs/xfs_iomap.c21
-rw-r--r--fs/xfs/xfs_iops.c18
-rw-r--r--fs/xfs/xfs_log.c51
-rw-r--r--fs/xfs/xfs_log_priv.h3
-rw-r--r--fs/xfs/xfs_log_recover.c760
-rw-r--r--fs/xfs/xfs_pnfs.c4
-rw-r--r--fs/xfs/xfs_rtalloc.c3
-rw-r--r--fs/xfs/xfs_super.c6
-rw-r--r--fs/xfs/xfs_symlink.c12
-rw-r--r--fs/xfs/xfs_sysfs.c36
-rw-r--r--fs/xfs/xfs_trace.h26
-rw-r--r--fs/xfs/xfs_trans_dquot.c14
-rw-r--r--fs/xfs/xfs_xattr.c143
600 files changed, 15088 insertions, 39036 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index a7e28890f..9da967f38 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -67,8 +67,8 @@ int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
return 0;
}
/* get the default/access acl values and cache them */
- dacl = __v9fs_get_acl(fid, POSIX_ACL_XATTR_DEFAULT);
- pacl = __v9fs_get_acl(fid, POSIX_ACL_XATTR_ACCESS);
+ dacl = __v9fs_get_acl(fid, XATTR_NAME_POSIX_ACL_DEFAULT);
+ pacl = __v9fs_get_acl(fid, XATTR_NAME_POSIX_ACL_ACCESS);
if (!IS_ERR(dacl) && !IS_ERR(pacl)) {
set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl);
@@ -133,10 +133,10 @@ static int v9fs_set_acl(struct p9_fid *fid, int type, struct posix_acl *acl)
goto err_free_out;
switch (type) {
case ACL_TYPE_ACCESS:
- name = POSIX_ACL_XATTR_ACCESS;
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
break;
case ACL_TYPE_DEFAULT:
- name = POSIX_ACL_XATTR_DEFAULT;
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
BUG();
@@ -220,15 +220,12 @@ static int v9fs_xattr_get_acl(const struct xattr_handler *handler,
struct posix_acl *acl;
int error;
- if (strcmp(name, "") != 0)
- return -EINVAL;
-
v9ses = v9fs_dentry2v9ses(dentry);
/*
* We allow set/get/list of acl when access=client is not specified
*/
if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT)
- return v9fs_xattr_get(dentry, handler->prefix, buffer, size);
+ return v9fs_xattr_get(dentry, handler->name, buffer, size);
acl = v9fs_get_cached_acl(d_inode(dentry), handler->flags);
if (IS_ERR(acl))
@@ -250,16 +247,13 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
struct v9fs_session_info *v9ses;
struct inode *inode = d_inode(dentry);
- if (strcmp(name, "") != 0)
- return -EINVAL;
-
v9ses = v9fs_dentry2v9ses(dentry);
/*
* set the attribute on the remote. Without even looking at the
* xattr value. We leave it to the server to validate
*/
if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT)
- return v9fs_xattr_set(dentry, handler->prefix, value, size,
+ return v9fs_xattr_set(dentry, handler->name, value, size,
flags);
if (S_ISLNK(inode->i_mode))
@@ -319,7 +313,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
default:
BUG();
}
- retval = v9fs_xattr_set(dentry, handler->prefix, value, size, flags);
+ retval = v9fs_xattr_set(dentry, handler->name, value, size, flags);
if (!retval)
set_cached_acl(inode, handler->flags, acl);
err_out:
@@ -328,14 +322,14 @@ err_out:
}
const struct xattr_handler v9fs_xattr_acl_access_handler = {
- .prefix = POSIX_ACL_XATTR_ACCESS,
+ .name = XATTR_NAME_POSIX_ACL_ACCESS,
.flags = ACL_TYPE_ACCESS,
.get = v9fs_xattr_get_acl,
.set = v9fs_xattr_set_acl,
};
const struct xattr_handler v9fs_xattr_acl_default_handler = {
- .prefix = POSIX_ACL_XATTR_DEFAULT,
+ .name = XATTR_NAME_POSIX_ACL_DEFAULT,
.flags = ACL_TYPE_DEFAULT,
.get = v9fs_xattr_get_acl,
.set = v9fs_xattr_set_acl,
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index a69260f27..103ca5e12 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -243,14 +243,14 @@ void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp)
if (!v9inode->fscache)
return;
- spin_lock(&v9inode->fscache_lock);
+ mutex_lock(&v9inode->fscache_lock);
if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
v9fs_cache_inode_flush_cookie(inode);
else
v9fs_cache_inode_get_cookie(inode);
- spin_unlock(&v9inode->fscache_lock);
+ mutex_unlock(&v9inode->fscache_lock);
}
void v9fs_cache_inode_reset_cookie(struct inode *inode)
@@ -264,7 +264,7 @@ void v9fs_cache_inode_reset_cookie(struct inode *inode)
old = v9inode->fscache;
- spin_lock(&v9inode->fscache_lock);
+ mutex_lock(&v9inode->fscache_lock);
fscache_relinquish_cookie(v9inode->fscache, 1);
v9ses = v9fs_inode2v9ses(inode);
@@ -274,7 +274,7 @@ void v9fs_cache_inode_reset_cookie(struct inode *inode)
p9_debug(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p\n",
inode, old, v9inode->fscache);
- spin_unlock(&v9inode->fscache_lock);
+ mutex_unlock(&v9inode->fscache_lock);
}
int __v9fs_fscache_release_page(struct page *page, gfp_t gfp)
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 6caca0250..072e75995 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -575,7 +575,7 @@ static int v9fs_init_inode_cache(void)
v9fs_inode_cache = kmem_cache_create("v9fs_inode_cache",
sizeof(struct v9fs_inode),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
v9fs_inode_init_once);
if (!v9fs_inode_cache)
return -ENOMEM;
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 0923f2cf3..687705038 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -123,7 +123,7 @@ struct v9fs_session_info {
struct v9fs_inode {
#ifdef CONFIG_9P_FSCACHE
- spinlock_t fscache_lock;
+ struct mutex fscache_lock;
struct fscache_cookie *fscache;
#endif
struct p9_qid qid;
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 7bf835f85..eadc894fa 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -449,14 +449,14 @@ static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end,
if (retval)
return retval;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
p9_debug(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);
fid = filp->private_data;
v9fs_blank_wstat(&wstat);
retval = p9_client_wstat(fid, &wstat);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return retval;
}
@@ -472,13 +472,13 @@ int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
if (retval)
return retval;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
p9_debug(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);
fid = filp->private_data;
retval = p9_client_fsync(fid, datasync);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return retval;
}
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 511078586..3a08b3e6f 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -244,7 +244,7 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
return NULL;
#ifdef CONFIG_9P_FSCACHE
v9inode->fscache = NULL;
- spin_lock_init(&v9inode->fscache_lock);
+ mutex_init(&v9inode->fscache_lock);
#endif
v9inode->writeback_fid = NULL;
v9inode->cache_validity = 0;
@@ -1223,18 +1223,26 @@ ino_t v9fs_qid2ino(struct p9_qid *qid)
}
/**
- * v9fs_vfs_follow_link - follow a symlink path
+ * v9fs_vfs_get_link - follow a symlink path
* @dentry: dentry for symlink
- * @cookie: place to pass the data to put_link()
+ * @inode: inode for symlink
+ * @done: delayed call for when we are done with the return value
*/
-static const char *v9fs_vfs_follow_link(struct dentry *dentry, void **cookie)
+static const char *v9fs_vfs_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct v9fs_session_info *v9ses = v9fs_dentry2v9ses(dentry);
- struct p9_fid *fid = v9fs_fid_lookup(dentry);
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid;
struct p9_wstat *st;
char *res;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
+ v9ses = v9fs_dentry2v9ses(dentry);
+ fid = v9fs_fid_lookup(dentry);
p9_debug(P9_DEBUG_VFS, "%pd\n", dentry);
if (IS_ERR(fid))
@@ -1259,7 +1267,8 @@ static const char *v9fs_vfs_follow_link(struct dentry *dentry, void **cookie)
p9stat_free(st);
kfree(st);
- return *cookie = res;
+ set_delayed_call(done, kfree_link, res);
+ return res;
}
/**
@@ -1452,8 +1461,7 @@ static const struct inode_operations v9fs_file_inode_operations = {
static const struct inode_operations v9fs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = v9fs_vfs_follow_link,
- .put_link = kfree_put_link,
+ .get_link = v9fs_vfs_get_link,
.getattr = v9fs_vfs_getattr,
.setattr = v9fs_vfs_setattr,
};
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index cb899af1b..a34702c99 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -899,26 +899,34 @@ error:
}
/**
- * v9fs_vfs_follow_link_dotl - follow a symlink path
+ * v9fs_vfs_get_link_dotl - follow a symlink path
* @dentry: dentry for symlink
- * @cookie: place to pass the data to put_link()
+ * @inode: inode for symlink
+ * @done: destructor for return value
*/
static const char *
-v9fs_vfs_follow_link_dotl(struct dentry *dentry, void **cookie)
+v9fs_vfs_get_link_dotl(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct p9_fid *fid = v9fs_fid_lookup(dentry);
+ struct p9_fid *fid;
char *target;
int retval;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
p9_debug(P9_DEBUG_VFS, "%pd\n", dentry);
+ fid = v9fs_fid_lookup(dentry);
if (IS_ERR(fid))
return ERR_CAST(fid);
retval = p9_client_readlink(fid, &target);
if (retval)
return ERR_PTR(retval);
- return *cookie = target;
+ set_delayed_call(done, kfree_link, target);
+ return target;
}
int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
@@ -984,8 +992,7 @@ const struct inode_operations v9fs_file_inode_operations_dotl = {
const struct inode_operations v9fs_symlink_inode_operations_dotl = {
.readlink = generic_readlink,
- .follow_link = v9fs_vfs_follow_link_dotl,
- .put_link = kfree_put_link,
+ .get_link = v9fs_vfs_get_link_dotl,
.getattr = v9fs_vfs_getattr_dotl,
.setattr = v9fs_vfs_setattr_dotl,
.setxattr = generic_setxattr,
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index e3d026ac3..9dd9b47a6 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -143,8 +143,6 @@ static int v9fs_xattr_handler_get(const struct xattr_handler *handler,
{
const char *full_name = xattr_full_name(handler, name);
- if (strcmp(name, "") == 0)
- return -EINVAL;
return v9fs_xattr_get(dentry, full_name, buffer, size);
}
@@ -154,8 +152,6 @@ static int v9fs_xattr_handler_set(const struct xattr_handler *handler,
{
const char *full_name = xattr_full_name(handler, name);
- if (strcmp(name, "") == 0)
- return -EINVAL;
return v9fs_xattr_set(dentry, full_name, value, size, flags);
}
diff --git a/fs/Kconfig b/fs/Kconfig
index bec7b6beb..dc04844aa 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -50,7 +50,8 @@ config FS_DAX_PMD
bool
default FS_DAX
depends on FS_DAX
- depends on BROKEN
+ depends on ZONE_DEVICE
+ depends on TRANSPARENT_HUGEPAGE
endif # BLOCK
@@ -73,6 +74,16 @@ config FILE_LOCKING
for filesystems like NFS and for the flock() system
call. Disabling this option saves about 11k.
+config MANDATORY_FILE_LOCKING
+ bool "Enable Mandatory file locking"
+ depends on FILE_LOCKING
+ default y
+ help
+ This option enables files appropriately marked files on appropriely
+ mounted filesystems to support mandatory locking.
+
+ To the best of my knowledge this is dead code that no one cares about.
+
source "fs/notify/Kconfig"
source "fs/quota/Kconfig"
@@ -222,7 +233,6 @@ source "fs/pstore/Kconfig"
source "fs/sysv/Kconfig"
source "fs/ufs/Kconfig"
source "fs/exofs/Kconfig"
-source "fs/aufs/Kconfig"
endif # MISC_FILESYSTEMS
diff --git a/fs/Makefile b/fs/Makefile
index 0c61756a2..b8da1c2c3 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -127,4 +127,3 @@ obj-y += exofs/ # Multiple modules
obj-$(CONFIG_CEPH_FS) += ceph/
obj-$(CONFIG_PSTORE) += pstore/
obj-$(CONFIG_EFIVAR_FS) += efivarfs/
-obj-$(CONFIG_AUFS_FS) += aufs/
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 24575d9d8..fadf408bd 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -44,24 +44,24 @@ struct adfs_dir_ops;
*/
struct adfs_sb_info {
union { struct {
- struct adfs_discmap *s_map; /* bh list containing map */
- struct adfs_dir_ops *s_dir; /* directory operations */
+ struct adfs_discmap *s_map; /* bh list containing map */
+ const struct adfs_dir_ops *s_dir; /* directory operations */
};
- struct rcu_head rcu; /* used only at shutdown time */
+ struct rcu_head rcu; /* used only at shutdown time */
};
- kuid_t s_uid; /* owner uid */
- kgid_t s_gid; /* owner gid */
- umode_t s_owner_mask; /* ADFS owner perm -> unix perm */
- umode_t s_other_mask; /* ADFS other perm -> unix perm */
+ kuid_t s_uid; /* owner uid */
+ kgid_t s_gid; /* owner gid */
+ umode_t s_owner_mask; /* ADFS owner perm -> unix perm */
+ umode_t s_other_mask; /* ADFS other perm -> unix perm */
int s_ftsuffix; /* ,xyz hex filetype suffix option */
- __u32 s_ids_per_zone; /* max. no ids in one zone */
- __u32 s_idlen; /* length of ID in map */
- __u32 s_map_size; /* sector size of a map */
- unsigned long s_size; /* total size (in blocks) of this fs */
- signed int s_map2blk; /* shift left by this for map->sector */
- unsigned int s_log2sharesize;/* log2 share size */
- __le32 s_version; /* disc format version */
+ __u32 s_ids_per_zone; /* max. no ids in one zone */
+ __u32 s_idlen; /* length of ID in map */
+ __u32 s_map_size; /* sector size of a map */
+ unsigned long s_size; /* total size (in blocks) of this fs */
+ signed int s_map2blk; /* shift left by this for map->sector*/
+ unsigned int s_log2sharesize;/* log2 share size */
+ __le32 s_version; /* disc format version */
unsigned int s_namelen; /* maximum number of characters in name */
};
@@ -168,8 +168,8 @@ void __adfs_error(struct super_block *sb, const char *function,
extern const struct inode_operations adfs_dir_inode_operations;
extern const struct file_operations adfs_dir_operations;
extern const struct dentry_operations adfs_dentry_operations;
-extern struct adfs_dir_ops adfs_f_dir_ops;
-extern struct adfs_dir_ops adfs_fplus_dir_ops;
+extern const struct adfs_dir_ops adfs_f_dir_ops;
+extern const struct adfs_dir_ops adfs_fplus_dir_ops;
extern int adfs_dir_update(struct super_block *sb, struct object_info *obj,
int wait);
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 51c279a29..fd4cf2c48 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -21,7 +21,7 @@ adfs_readdir(struct file *file, struct dir_context *ctx)
{
struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
- struct adfs_dir_ops *ops = ADFS_SB(sb)->s_dir;
+ const struct adfs_dir_ops *ops = ADFS_SB(sb)->s_dir;
struct object_info obj;
struct adfs_dir dir;
int ret = 0;
@@ -69,7 +69,7 @@ adfs_dir_update(struct super_block *sb, struct object_info *obj, int wait)
{
int ret = -EINVAL;
#ifdef CONFIG_ADFS_FS_RW
- struct adfs_dir_ops *ops = ADFS_SB(sb)->s_dir;
+ const struct adfs_dir_ops *ops = ADFS_SB(sb)->s_dir;
struct adfs_dir dir;
printk(KERN_INFO "adfs_dir_update: object %06X in dir %06X\n",
@@ -129,7 +129,7 @@ static int
adfs_dir_lookup_byname(struct inode *inode, struct qstr *name, struct object_info *obj)
{
struct super_block *sb = inode->i_sb;
- struct adfs_dir_ops *ops = ADFS_SB(sb)->s_dir;
+ const struct adfs_dir_ops *ops = ADFS_SB(sb)->s_dir;
struct adfs_dir dir;
int ret;
diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c
index 4bbe853ee..0fbfd0b04 100644
--- a/fs/adfs/dir_f.c
+++ b/fs/adfs/dir_f.c
@@ -476,7 +476,7 @@ adfs_f_free(struct adfs_dir *dir)
dir->sb = NULL;
}
-struct adfs_dir_ops adfs_f_dir_ops = {
+const struct adfs_dir_ops adfs_f_dir_ops = {
.read = adfs_f_read,
.setpos = adfs_f_setpos,
.getnext = adfs_f_getnext,
diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c
index 82d14cdf7..c92cfb638 100644
--- a/fs/adfs/dir_fplus.c
+++ b/fs/adfs/dir_fplus.c
@@ -256,7 +256,7 @@ adfs_fplus_free(struct adfs_dir *dir)
dir->sb = NULL;
}
-struct adfs_dir_ops adfs_fplus_dir_ops = {
+const struct adfs_dir_ops adfs_fplus_dir_ops = {
.read = adfs_fplus_read,
.setpos = adfs_fplus_setpos,
.getnext = adfs_fplus_getnext,
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 4d4a0df83..c9fdfb112 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -271,7 +271,7 @@ static int __init init_inodecache(void)
adfs_inode_cachep = kmem_cache_create("adfs_inode_cache",
sizeof(struct adfs_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
init_once);
if (adfs_inode_cachep == NULL)
return -ENOMEM;
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index c69a87eaf..cc2b2efc9 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -138,7 +138,7 @@ extern int affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh);
extern int affs_remove_header(struct dentry *dentry);
extern u32 affs_checksum_block(struct super_block *sb, struct buffer_head *bh);
extern void affs_fix_checksum(struct super_block *sb, struct buffer_head *bh);
-extern void secs_to_datestamp(time_t secs, struct affs_date *ds);
+extern void secs_to_datestamp(time64_t secs, struct affs_date *ds);
extern umode_t prot_to_mode(u32 prot);
extern void mode_to_prot(struct inode *inode);
__printf(3, 4)
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 5fa92bc79..d6c7a51c9 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -8,6 +8,7 @@
* Please send bug reports to: hjw@zvw.de
*/
+#include <linux/math64.h>
#include "affs.h"
/*
@@ -366,22 +367,22 @@ affs_fix_checksum(struct super_block *sb, struct buffer_head *bh)
}
void
-secs_to_datestamp(time_t secs, struct affs_date *ds)
+secs_to_datestamp(time64_t secs, struct affs_date *ds)
{
u32 days;
u32 minute;
+ s32 rem;
secs -= sys_tz.tz_minuteswest * 60 + ((8 * 365 + 2) * 24 * 60 * 60);
if (secs < 0)
secs = 0;
- days = secs / 86400;
- secs -= days * 86400;
- minute = secs / 60;
- secs -= minute * 60;
+ days = div_s64_rem(secs, 86400, &rem);
+ minute = rem / 60;
+ rem -= minute * 60;
ds->days = cpu_to_be32(days);
ds->mins = cpu_to_be32(minute);
- ds->ticks = cpu_to_be32(secs * 50);
+ ds->ticks = cpu_to_be32(rem * 50);
}
umode_t
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 659c579c4..22fc7c802 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -33,11 +33,11 @@ affs_file_release(struct inode *inode, struct file *filp)
inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
if (atomic_dec_and_test(&AFFS_I(inode)->i_opencnt)) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (inode->i_size != AFFS_I(inode)->mmu_private)
affs_truncate(inode);
affs_free_prealloc(inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
return 0;
@@ -511,8 +511,6 @@ affs_do_readpage_ofs(struct page *page, unsigned to)
pr_debug("%s(%lu, %ld, 0, %d)\n", __func__, inode->i_ino,
page->index, to);
BUG_ON(to > PAGE_CACHE_SIZE);
- kmap(page);
- data = page_address(page);
bsize = AFFS_SB(sb)->s_data_blksize;
tmp = page->index << PAGE_CACHE_SHIFT;
bidx = tmp / bsize;
@@ -524,14 +522,15 @@ affs_do_readpage_ofs(struct page *page, unsigned to)
return PTR_ERR(bh);
tmp = min(bsize - boff, to - pos);
BUG_ON(pos + tmp > to || tmp > bsize);
+ data = kmap_atomic(page);
memcpy(data + pos, AFFS_DATA(bh) + boff, tmp);
+ kunmap_atomic(data);
affs_brelse(bh);
bidx++;
pos += tmp;
boff = 0;
}
flush_dcache_page(page);
- kunmap(page);
return 0;
}
@@ -958,12 +957,12 @@ int affs_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
if (err)
return err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = write_inode_now(inode, 0);
err = sync_blockdev(inode->i_sb->s_bdev);
if (!ret)
ret = err;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
const struct file_operations affs_file_operations = {
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 173495005..0fdb0f5b2 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -140,6 +140,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
break;
case ST_SOFTLINK:
inode->i_mode |= S_IFLNK;
+ inode_nohighmem(inode);
inode->i_op = &affs_symlink_inode_operations;
inode->i_data.a_ops = &affs_symlink_aops;
break;
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 181e05b46..00d3002a6 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -344,6 +344,7 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
return -ENOSPC;
inode->i_op = &affs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_data.a_ops = &affs_symlink_aops;
inode->i_mode = S_IFLNK | 0777;
mode_to_prot(inode);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 5b50c4ca4..2a6713b6b 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -32,7 +32,7 @@ affs_commit_super(struct super_block *sb, int wait)
struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh);
lock_buffer(bh);
- secs_to_datestamp(get_seconds(), &tail->disk_change);
+ secs_to_datestamp(ktime_get_real_seconds(), &tail->disk_change);
affs_fix_checksum(sb, bh);
unlock_buffer(bh);
@@ -132,7 +132,7 @@ static int __init init_inodecache(void)
affs_inode_cachep = kmem_cache_create("affs_inode_cache",
sizeof(struct affs_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
init_once);
if (affs_inode_cachep == NULL)
return -ENOMEM;
diff --git a/fs/affs/symlink.c b/fs/affs/symlink.c
index ea5b69a18..69b03dbb7 100644
--- a/fs/affs/symlink.c
+++ b/fs/affs/symlink.c
@@ -14,13 +14,13 @@ static int affs_symlink_readpage(struct file *file, struct page *page)
{
struct buffer_head *bh;
struct inode *inode = page->mapping->host;
- char *link = kmap(page);
+ char *link = page_address(page);
struct slink_front *lf;
int i, j;
char c;
char lc;
- pr_debug("follow_link(ino=%lu)\n", inode->i_ino);
+ pr_debug("get_link(ino=%lu)\n", inode->i_ino);
bh = affs_bread(inode->i_sb, inode->i_ino);
if (!bh)
@@ -57,12 +57,10 @@ static int affs_symlink_readpage(struct file *file, struct page *page)
link[i] = '\0';
affs_brelse(bh);
SetPageUptodate(page);
- kunmap(page);
unlock_page(page);
return 0;
fail:
SetPageError(page);
- kunmap(page);
unlock_page(page);
return -EIO;
}
@@ -73,7 +71,6 @@ const struct address_space_operations affs_symlink_aops = {
const struct inode_operations affs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.setattr = affs_notify_change,
};
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 4baf1d2b3..d91a9c9cf 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -483,7 +483,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
fl->fl_type = F_UNLCK;
- mutex_lock(&vnode->vfs_inode.i_mutex);
+ inode_lock(&vnode->vfs_inode);
/* check local lock records first */
ret = 0;
@@ -505,7 +505,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
}
error:
- mutex_unlock(&vnode->vfs_inode.i_mutex);
+ inode_unlock(&vnode->vfs_inode);
_leave(" = %d [%hd]", ret, fl->fl_type);
return ret;
}
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index e06f5a233..86cc7264c 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -56,6 +56,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
case AFS_FTYPE_SYMLINK:
inode->i_mode = S_IFLNK | vnode->status.mode;
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
break;
default:
printk("kAFS: AFS vnode with undefined type\n");
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 24a905b07..2853b4095 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -230,14 +230,9 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf,
if (size <= 1 || size >= PAGE_SIZE)
return -EINVAL;
- kbuf = kmalloc(size + 1, GFP_KERNEL);
- if (!kbuf)
- return -ENOMEM;
-
- ret = -EFAULT;
- if (copy_from_user(kbuf, buf, size) != 0)
- goto done;
- kbuf[size] = 0;
+ kbuf = memdup_user_nul(buf, size);
+ if (IS_ERR(kbuf))
+ return PTR_ERR(kbuf);
/* trim to first NL */
name = memchr(kbuf, '\n', size);
@@ -315,15 +310,9 @@ static ssize_t afs_proc_rootcell_write(struct file *file,
if (size <= 1 || size >= PAGE_SIZE)
return -EINVAL;
- ret = -ENOMEM;
- kbuf = kmalloc(size + 1, GFP_KERNEL);
- if (!kbuf)
- goto nomem;
-
- ret = -EFAULT;
- if (copy_from_user(kbuf, buf, size) != 0)
- goto infault;
- kbuf[size] = 0;
+ kbuf = memdup_user_nul(buf, size);
+ if (IS_ERR(kbuf))
+ return PTR_ERR(kbuf);
/* trim to first NL */
s = memchr(kbuf, '\n', size);
@@ -337,9 +326,7 @@ static ssize_t afs_proc_rootcell_write(struct file *file,
if (ret >= 0)
ret = size; /* consume everything, always */
-infault:
kfree(kbuf);
-nomem:
_leave(" = %d", ret);
return ret;
}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 1fb4a5129..81afefe7d 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -91,7 +91,7 @@ int __init afs_fs_init(void)
afs_inode_cachep = kmem_cache_create("afs_inode_cache",
sizeof(struct afs_vnode),
0,
- SLAB_HWCACHE_ALIGN,
+ SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT,
afs_i_init_once);
if (!afs_inode_cachep) {
printk(KERN_NOTICE "kAFS: Failed to allocate inode cache\n");
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 0714abcd7..dfef94f70 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -693,7 +693,7 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* use a writeback record as a marker in the queue - when this reaches
* the front of the queue, all the outstanding writes are either
@@ -735,7 +735,7 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
afs_put_writeback(wb);
_leave(" = %d", ret);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
diff --git a/fs/attr.c b/fs/attr.c
index 6530ced19..25b24d0f6 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -195,7 +195,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
struct timespec now;
unsigned int ia_valid = attr->ia_valid;
- WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
+ WARN_ON_ONCE(!inode_is_locked(inode));
if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
diff --git a/fs/aufs/Kconfig b/fs/aufs/Kconfig
deleted file mode 100644
index 63560ceda..000000000
--- a/fs/aufs/Kconfig
+++ /dev/null
@@ -1,185 +0,0 @@
-config AUFS_FS
- tristate "Aufs (Advanced multi layered unification filesystem) support"
- help
- Aufs is a stackable unification filesystem such as Unionfs,
- which unifies several directories and provides a merged single
- directory.
- In the early days, aufs was entirely re-designed and
- re-implemented Unionfs Version 1.x series. Introducing many
- original ideas, approaches and improvements, it becomes totally
- different from Unionfs while keeping the basic features.
-
-if AUFS_FS
-choice
- prompt "Maximum number of branches"
- default AUFS_BRANCH_MAX_127
- help
- Specifies the maximum number of branches (or member directories)
- in a single aufs. The larger value consumes more system
- resources and has a minor impact to performance.
-config AUFS_BRANCH_MAX_127
- bool "127"
- help
- Specifies the maximum number of branches (or member directories)
- in a single aufs. The larger value consumes more system
- resources and has a minor impact to performance.
-config AUFS_BRANCH_MAX_511
- bool "511"
- help
- Specifies the maximum number of branches (or member directories)
- in a single aufs. The larger value consumes more system
- resources and has a minor impact to performance.
-config AUFS_BRANCH_MAX_1023
- bool "1023"
- help
- Specifies the maximum number of branches (or member directories)
- in a single aufs. The larger value consumes more system
- resources and has a minor impact to performance.
-config AUFS_BRANCH_MAX_32767
- bool "32767"
- help
- Specifies the maximum number of branches (or member directories)
- in a single aufs. The larger value consumes more system
- resources and has a minor impact to performance.
-endchoice
-
-config AUFS_SBILIST
- bool
- depends on AUFS_MAGIC_SYSRQ || PROC_FS
- default y
- help
- Automatic configuration for internal use.
- When aufs supports Magic SysRq or /proc, enabled automatically.
-
-config AUFS_HNOTIFY
- bool "Detect direct branch access (bypassing aufs)"
- help
- If you want to modify files on branches directly, eg. bypassing aufs,
- and want aufs to detect the changes of them fully, then enable this
- option and use 'udba=notify' mount option.
- Currently there is only one available configuration, "fsnotify".
- It will have a negative impact to the performance.
- See detail in aufs.5.
-
-choice
- prompt "method" if AUFS_HNOTIFY
- default AUFS_HFSNOTIFY
-config AUFS_HFSNOTIFY
- bool "fsnotify"
- select FSNOTIFY
-endchoice
-
-config AUFS_EXPORT
- bool "NFS-exportable aufs"
- depends on EXPORTFS
- help
- If you want to export your mounted aufs via NFS, then enable this
- option. There are several requirements for this configuration.
- See detail in aufs.5.
-
-config AUFS_INO_T_64
- bool
- depends on AUFS_EXPORT
- depends on 64BIT && !(ALPHA || S390)
- default y
- help
- Automatic configuration for internal use.
- /* typedef unsigned long/int __kernel_ino_t */
- /* alpha and s390x are int */
-
-config AUFS_XATTR
- bool "support for XATTR/EA (including Security Labels)"
- help
- If your branch fs supports XATTR/EA and you want to make them
- available in aufs too, then enable this opsion and specify the
- branch attributes for EA.
- See detail in aufs.5.
-
-config AUFS_FHSM
- bool "File-based Hierarchical Storage Management"
- help
- Hierarchical Storage Management (or HSM) is a well-known feature
- in the storage world. Aufs provides this feature as file-based.
- with multiple branches.
- These multiple branches are prioritized, ie. the topmost one
- should be the fastest drive and be used heavily.
-
-config AUFS_RDU
- bool "Readdir in userspace"
- help
- Aufs has two methods to provide a merged view for a directory,
- by a user-space library and by kernel-space natively. The latter
- is always enabled but sometimes large and slow.
- If you enable this option, install the library in aufs2-util
- package, and set some environment variables for your readdir(3),
- then the work will be handled in user-space which generally
- shows better performance in most cases.
- See detail in aufs.5.
-
-config AUFS_SHWH
- bool "Show whiteouts"
- help
- If you want to make the whiteouts in aufs visible, then enable
- this option and specify 'shwh' mount option. Although it may
- sounds like philosophy or something, but in technically it
- simply shows the name of whiteout with keeping its behaviour.
-
-config AUFS_BR_RAMFS
- bool "Ramfs (initramfs/rootfs) as an aufs branch"
- help
- If you want to use ramfs as an aufs branch fs, then enable this
- option. Generally tmpfs is recommended.
- Aufs prohibited them to be a branch fs by default, because
- initramfs becomes unusable after switch_root or something
- generally. If you sets initramfs as an aufs branch and boot your
- system by switch_root, you will meet a problem easily since the
- files in initramfs may be inaccessible.
- Unless you are going to use ramfs as an aufs branch fs without
- switch_root or something, leave it N.
-
-config AUFS_BR_FUSE
- bool "Fuse fs as an aufs branch"
- depends on FUSE_FS
- select AUFS_POLL
- help
- If you want to use fuse-based userspace filesystem as an aufs
- branch fs, then enable this option.
- It implements the internal poll(2) operation which is
- implemented by fuse only (curretnly).
-
-config AUFS_POLL
- bool
- help
- Automatic configuration for internal use.
-
-config AUFS_BR_HFSPLUS
- bool "Hfsplus as an aufs branch"
- depends on HFSPLUS_FS
- default y
- help
- If you want to use hfsplus fs as an aufs branch fs, then enable
- this option. This option introduces a small overhead at
- copying-up a file on hfsplus.
-
-config AUFS_BDEV_LOOP
- bool
- depends on BLK_DEV_LOOP
- default y
- help
- Automatic configuration for internal use.
- Convert =[ym] into =y.
-
-config AUFS_DEBUG
- bool "Debug aufs"
- help
- Enable this to compile aufs internal debug code.
- It will have a negative impact to the performance.
-
-config AUFS_MAGIC_SYSRQ
- bool
- depends on AUFS_DEBUG && MAGIC_SYSRQ
- default y
- help
- Automatic configuration for internal use.
- When aufs supports Magic SysRq, enabled automatically.
-endif
diff --git a/fs/aufs/Makefile b/fs/aufs/Makefile
deleted file mode 100644
index c7efb62b5..000000000
--- a/fs/aufs/Makefile
+++ /dev/null
@@ -1,36 +0,0 @@
-
-include ${srctree}/${src}/magic.mk
-
-# cf. include/linux/kernel.h
-# enable pr_debug
-ccflags-y += -DDEBUG
-# sparse requires the full pathname
-ccflags-y += -include ${srctree}/include/uapi/linux/aufs_type.h
-
-obj-$(CONFIG_AUFS_FS) += aufs.o
-aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \
- wkq.o vfsub.o dcsub.o \
- cpup.o whout.o wbr_policy.o \
- dinfo.o dentry.o \
- dynop.o \
- finfo.o file.o f_op.o \
- dir.o vdir.o \
- iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \
- mvdown.o ioctl.o
-
-# all are boolean
-aufs-$(CONFIG_PROC_FS) += procfs.o plink.o
-aufs-$(CONFIG_SYSFS) += sysfs.o
-aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o
-aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o
-aufs-$(CONFIG_AUFS_HNOTIFY) += hnotify.o
-aufs-$(CONFIG_AUFS_HFSNOTIFY) += hfsnotify.o
-aufs-$(CONFIG_AUFS_EXPORT) += export.o
-aufs-$(CONFIG_AUFS_XATTR) += xattr.o
-aufs-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
-aufs-$(CONFIG_AUFS_FHSM) += fhsm.o
-aufs-$(CONFIG_AUFS_POLL) += poll.o
-aufs-$(CONFIG_AUFS_RDU) += rdu.o
-aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o
-aufs-$(CONFIG_AUFS_DEBUG) += debug.o
-aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o
diff --git a/fs/aufs/aufs.h b/fs/aufs/aufs.h
deleted file mode 100644
index 49f43b433..000000000
--- a/fs/aufs/aufs.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * all header files
- */
-
-#ifndef __AUFS_H__
-#define __AUFS_H__
-
-#ifdef __KERNEL__
-
-#define AuStub(type, name, body, ...) \
- static inline type name(__VA_ARGS__) { body; }
-
-#define AuStubVoid(name, ...) \
- AuStub(void, name, , __VA_ARGS__)
-#define AuStubInt0(name, ...) \
- AuStub(int, name, return 0, __VA_ARGS__)
-
-#include "debug.h"
-
-#include "branch.h"
-#include "cpup.h"
-#include "dcsub.h"
-#include "dbgaufs.h"
-#include "dentry.h"
-#include "dir.h"
-#include "dynop.h"
-#include "file.h"
-#include "fstype.h"
-#include "inode.h"
-#include "loop.h"
-#include "module.h"
-#include "opts.h"
-#include "rwsem.h"
-#include "spl.h"
-#include "super.h"
-#include "sysaufs.h"
-#include "vfsub.h"
-#include "whout.h"
-#include "wkq.h"
-
-#endif /* __KERNEL__ */
-#endif /* __AUFS_H__ */
diff --git a/fs/aufs/branch.c b/fs/aufs/branch.c
deleted file mode 100644
index 1ab5e1f2d..000000000
--- a/fs/aufs/branch.c
+++ /dev/null
@@ -1,1394 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * branch management
- */
-
-#include <linux/compat.h>
-#include <linux/statfs.h>
-#include "aufs.h"
-
-/*
- * free a single branch
- */
-static void au_br_do_free(struct au_branch *br)
-{
- int i;
- struct au_wbr *wbr;
- struct au_dykey **key;
-
- au_hnotify_fin_br(br);
-
- if (br->br_xino.xi_file)
- fput(br->br_xino.xi_file);
- mutex_destroy(&br->br_xino.xi_nondir_mtx);
-
- AuDebugOn(atomic_read(&br->br_count));
-
- wbr = br->br_wbr;
- if (wbr) {
- for (i = 0; i < AuBrWh_Last; i++)
- dput(wbr->wbr_wh[i]);
- AuDebugOn(atomic_read(&wbr->wbr_wh_running));
- AuRwDestroy(&wbr->wbr_wh_rwsem);
- }
-
- if (br->br_fhsm) {
- au_br_fhsm_fin(br->br_fhsm);
- kfree(br->br_fhsm);
- }
-
- key = br->br_dykey;
- for (i = 0; i < AuBrDynOp; i++, key++)
- if (*key)
- au_dy_put(*key);
- else
- break;
-
- /* recursive lock, s_umount of branch's */
- lockdep_off();
- path_put(&br->br_path);
- lockdep_on();
- kfree(wbr);
- kfree(br);
-}
-
-/*
- * frees all branches
- */
-void au_br_free(struct au_sbinfo *sbinfo)
-{
- aufs_bindex_t bmax;
- struct au_branch **br;
-
- AuRwMustWriteLock(&sbinfo->si_rwsem);
-
- bmax = sbinfo->si_bend + 1;
- br = sbinfo->si_branch;
- while (bmax--)
- au_br_do_free(*br++);
-}
-
-/*
- * find the index of a branch which is specified by @br_id.
- */
-int au_br_index(struct super_block *sb, aufs_bindex_t br_id)
-{
- aufs_bindex_t bindex, bend;
-
- bend = au_sbend(sb);
- for (bindex = 0; bindex <= bend; bindex++)
- if (au_sbr_id(sb, bindex) == br_id)
- return bindex;
- return -1;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * add a branch
- */
-
-static int test_overlap(struct super_block *sb, struct dentry *h_adding,
- struct dentry *h_root)
-{
- if (unlikely(h_adding == h_root
- || au_test_loopback_overlap(sb, h_adding)))
- return 1;
- if (h_adding->d_sb != h_root->d_sb)
- return 0;
- return au_test_subdir(h_adding, h_root)
- || au_test_subdir(h_root, h_adding);
-}
-
-/*
- * returns a newly allocated branch. @new_nbranch is a number of branches
- * after adding a branch.
- */
-static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch,
- int perm)
-{
- struct au_branch *add_branch;
- struct dentry *root;
- struct inode *inode;
- int err;
-
- err = -ENOMEM;
- root = sb->s_root;
- add_branch = kzalloc(sizeof(*add_branch), GFP_NOFS);
- if (unlikely(!add_branch))
- goto out;
-
- err = au_hnotify_init_br(add_branch, perm);
- if (unlikely(err))
- goto out_br;
-
- if (au_br_writable(perm)) {
- /* may be freed separately at changing the branch permission */
- add_branch->br_wbr = kzalloc(sizeof(*add_branch->br_wbr),
- GFP_NOFS);
- if (unlikely(!add_branch->br_wbr))
- goto out_hnotify;
- }
-
- if (au_br_fhsm(perm)) {
- err = au_fhsm_br_alloc(add_branch);
- if (unlikely(err))
- goto out_wbr;
- }
-
- err = au_sbr_realloc(au_sbi(sb), new_nbranch);
- if (!err)
- err = au_di_realloc(au_di(root), new_nbranch);
- if (!err) {
- inode = d_inode(root);
- err = au_ii_realloc(au_ii(inode), new_nbranch);
- }
- if (!err)
- return add_branch; /* success */
-
-out_wbr:
- kfree(add_branch->br_wbr);
-out_hnotify:
- au_hnotify_fin_br(add_branch);
-out_br:
- kfree(add_branch);
-out:
- return ERR_PTR(err);
-}
-
-/*
- * test if the branch permission is legal or not.
- */
-static int test_br(struct inode *inode, int brperm, char *path)
-{
- int err;
-
- err = (au_br_writable(brperm) && IS_RDONLY(inode));
- if (!err)
- goto out;
-
- err = -EINVAL;
- pr_err("write permission for readonly mount or inode, %s\n", path);
-
-out:
- return err;
-}
-
-/*
- * returns:
- * 0: success, the caller will add it
- * plus: success, it is already unified, the caller should ignore it
- * minus: error
- */
-static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
-{
- int err;
- aufs_bindex_t bend, bindex;
- struct dentry *root, *h_dentry;
- struct inode *inode, *h_inode;
-
- root = sb->s_root;
- bend = au_sbend(sb);
- if (unlikely(bend >= 0
- && au_find_dbindex(root, add->path.dentry) >= 0)) {
- err = 1;
- if (!remount) {
- err = -EINVAL;
- pr_err("%s duplicated\n", add->pathname);
- }
- goto out;
- }
-
- err = -ENOSPC; /* -E2BIG; */
- if (unlikely(AUFS_BRANCH_MAX <= add->bindex
- || AUFS_BRANCH_MAX - 1 <= bend)) {
- pr_err("number of branches exceeded %s\n", add->pathname);
- goto out;
- }
-
- err = -EDOM;
- if (unlikely(add->bindex < 0 || bend + 1 < add->bindex)) {
- pr_err("bad index %d\n", add->bindex);
- goto out;
- }
-
- inode = d_inode(add->path.dentry);
- err = -ENOENT;
- if (unlikely(!inode->i_nlink)) {
- pr_err("no existence %s\n", add->pathname);
- goto out;
- }
-
- err = -EINVAL;
- if (unlikely(inode->i_sb == sb)) {
- pr_err("%s must be outside\n", add->pathname);
- goto out;
- }
-
- if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) {
- pr_err("unsupported filesystem, %s (%s)\n",
- add->pathname, au_sbtype(inode->i_sb));
- goto out;
- }
-
- if (unlikely(inode->i_sb->s_stack_depth)) {
- pr_err("already stacked, %s (%s)\n",
- add->pathname, au_sbtype(inode->i_sb));
- goto out;
- }
-
- err = test_br(d_inode(add->path.dentry), add->perm, add->pathname);
- if (unlikely(err))
- goto out;
-
- if (bend < 0)
- return 0; /* success */
-
- err = -EINVAL;
- for (bindex = 0; bindex <= bend; bindex++)
- if (unlikely(test_overlap(sb, add->path.dentry,
- au_h_dptr(root, bindex)))) {
- pr_err("%s is overlapped\n", add->pathname);
- goto out;
- }
-
- err = 0;
- if (au_opt_test(au_mntflags(sb), WARN_PERM)) {
- h_dentry = au_h_dptr(root, 0);
- h_inode = d_inode(h_dentry);
- if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO)
- || !uid_eq(h_inode->i_uid, inode->i_uid)
- || !gid_eq(h_inode->i_gid, inode->i_gid))
- pr_warn("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n",
- add->pathname,
- i_uid_read(inode), i_gid_read(inode),
- (inode->i_mode & S_IALLUGO),
- i_uid_read(h_inode), i_gid_read(h_inode),
- (h_inode->i_mode & S_IALLUGO));
- }
-
-out:
- return err;
-}
-
-/*
- * initialize or clean the whiteouts for an adding branch
- */
-static int au_br_init_wh(struct super_block *sb, struct au_branch *br,
- int new_perm)
-{
- int err, old_perm;
- aufs_bindex_t bindex;
- struct mutex *h_mtx;
- struct au_wbr *wbr;
- struct au_hinode *hdir;
- struct dentry *h_dentry;
-
- err = vfsub_mnt_want_write(au_br_mnt(br));
- if (unlikely(err))
- goto out;
-
- wbr = br->br_wbr;
- old_perm = br->br_perm;
- br->br_perm = new_perm;
- hdir = NULL;
- h_mtx = NULL;
- bindex = au_br_index(sb, br->br_id);
- if (0 <= bindex) {
- hdir = au_hi(d_inode(sb->s_root), bindex);
- au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
- } else {
- h_dentry = au_br_dentry(br);
- h_mtx = &d_inode(h_dentry)->i_mutex;
- mutex_lock_nested(h_mtx, AuLsc_I_PARENT);
- }
- if (!wbr)
- err = au_wh_init(br, sb);
- else {
- wbr_wh_write_lock(wbr);
- err = au_wh_init(br, sb);
- wbr_wh_write_unlock(wbr);
- }
- if (hdir)
- au_hn_imtx_unlock(hdir);
- else
- mutex_unlock(h_mtx);
- vfsub_mnt_drop_write(au_br_mnt(br));
- br->br_perm = old_perm;
-
- if (!err && wbr && !au_br_writable(new_perm)) {
- kfree(wbr);
- br->br_wbr = NULL;
- }
-
-out:
- return err;
-}
-
-static int au_wbr_init(struct au_branch *br, struct super_block *sb,
- int perm)
-{
- int err;
- struct kstatfs kst;
- struct au_wbr *wbr;
-
- wbr = br->br_wbr;
- au_rw_init(&wbr->wbr_wh_rwsem);
- atomic_set(&wbr->wbr_wh_running, 0);
-
- /*
- * a limit for rmdir/rename a dir
- * cf. AUFS_MAX_NAMELEN in include/uapi/linux/aufs_type.h
- */
- err = vfs_statfs(&br->br_path, &kst);
- if (unlikely(err))
- goto out;
- err = -EINVAL;
- if (kst.f_namelen >= NAME_MAX)
- err = au_br_init_wh(sb, br, perm);
- else
- pr_err("%pd(%s), unsupported namelen %ld\n",
- au_br_dentry(br),
- au_sbtype(au_br_dentry(br)->d_sb), kst.f_namelen);
-
-out:
- return err;
-}
-
-/* initialize a new branch */
-static int au_br_init(struct au_branch *br, struct super_block *sb,
- struct au_opt_add *add)
-{
- int err;
- struct inode *h_inode;
-
- err = 0;
- mutex_init(&br->br_xino.xi_nondir_mtx);
- br->br_perm = add->perm;
- br->br_path = add->path; /* set first, path_get() later */
- spin_lock_init(&br->br_dykey_lock);
- atomic_set(&br->br_count, 0);
- atomic_set(&br->br_xino_running, 0);
- br->br_id = au_new_br_id(sb);
- AuDebugOn(br->br_id < 0);
-
- if (au_br_writable(add->perm)) {
- err = au_wbr_init(br, sb, add->perm);
- if (unlikely(err))
- goto out_err;
- }
-
- if (au_opt_test(au_mntflags(sb), XINO)) {
- h_inode = d_inode(add->path.dentry);
- err = au_xino_br(sb, br, h_inode->i_ino,
- au_sbr(sb, 0)->br_xino.xi_file, /*do_test*/1);
- if (unlikely(err)) {
- AuDebugOn(br->br_xino.xi_file);
- goto out_err;
- }
- }
-
- sysaufs_br_init(br);
- path_get(&br->br_path);
- goto out; /* success */
-
-out_err:
- memset(&br->br_path, 0, sizeof(br->br_path));
-out:
- return err;
-}
-
-static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex,
- struct au_branch *br, aufs_bindex_t bend,
- aufs_bindex_t amount)
-{
- struct au_branch **brp;
-
- AuRwMustWriteLock(&sbinfo->si_rwsem);
-
- brp = sbinfo->si_branch + bindex;
- memmove(brp + 1, brp, sizeof(*brp) * amount);
- *brp = br;
- sbinfo->si_bend++;
- if (unlikely(bend < 0))
- sbinfo->si_bend = 0;
-}
-
-static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex,
- aufs_bindex_t bend, aufs_bindex_t amount)
-{
- struct au_hdentry *hdp;
-
- AuRwMustWriteLock(&dinfo->di_rwsem);
-
- hdp = dinfo->di_hdentry + bindex;
- memmove(hdp + 1, hdp, sizeof(*hdp) * amount);
- au_h_dentry_init(hdp);
- dinfo->di_bend++;
- if (unlikely(bend < 0))
- dinfo->di_bstart = 0;
-}
-
-static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex,
- aufs_bindex_t bend, aufs_bindex_t amount)
-{
- struct au_hinode *hip;
-
- AuRwMustWriteLock(&iinfo->ii_rwsem);
-
- hip = iinfo->ii_hinode + bindex;
- memmove(hip + 1, hip, sizeof(*hip) * amount);
- hip->hi_inode = NULL;
- au_hn_init(hip);
- iinfo->ii_bend++;
- if (unlikely(bend < 0))
- iinfo->ii_bstart = 0;
-}
-
-static void au_br_do_add(struct super_block *sb, struct au_branch *br,
- aufs_bindex_t bindex)
-{
- struct dentry *root, *h_dentry;
- struct inode *root_inode, *h_inode;
- aufs_bindex_t bend, amount;
-
- root = sb->s_root;
- root_inode = d_inode(root);
- bend = au_sbend(sb);
- amount = bend + 1 - bindex;
- h_dentry = au_br_dentry(br);
- au_sbilist_lock();
- au_br_do_add_brp(au_sbi(sb), bindex, br, bend, amount);
- au_br_do_add_hdp(au_di(root), bindex, bend, amount);
- au_br_do_add_hip(au_ii(root_inode), bindex, bend, amount);
- au_set_h_dptr(root, bindex, dget(h_dentry));
- h_inode = d_inode(h_dentry);
- au_set_h_iptr(root_inode, bindex, au_igrab(h_inode), /*flags*/0);
- au_sbilist_unlock();
-}
-
-int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount)
-{
- int err;
- aufs_bindex_t bend, add_bindex;
- struct dentry *root, *h_dentry;
- struct inode *root_inode;
- struct au_branch *add_branch;
-
- root = sb->s_root;
- root_inode = d_inode(root);
- IMustLock(root_inode);
- err = test_add(sb, add, remount);
- if (unlikely(err < 0))
- goto out;
- if (err) {
- err = 0;
- goto out; /* success */
- }
-
- bend = au_sbend(sb);
- add_branch = au_br_alloc(sb, bend + 2, add->perm);
- err = PTR_ERR(add_branch);
- if (IS_ERR(add_branch))
- goto out;
-
- err = au_br_init(add_branch, sb, add);
- if (unlikely(err)) {
- au_br_do_free(add_branch);
- goto out;
- }
-
- add_bindex = add->bindex;
- if (!remount)
- au_br_do_add(sb, add_branch, add_bindex);
- else {
- sysaufs_brs_del(sb, add_bindex);
- au_br_do_add(sb, add_branch, add_bindex);
- sysaufs_brs_add(sb, add_bindex);
- }
-
- h_dentry = add->path.dentry;
- if (!add_bindex) {
- au_cpup_attr_all(root_inode, /*force*/1);
- sb->s_maxbytes = h_dentry->d_sb->s_maxbytes;
- } else
- au_add_nlink(root_inode, d_inode(h_dentry));
-
- /*
- * this test/set prevents aufs from handling unnecesary notify events
- * of xino files, in case of re-adding a writable branch which was
- * once detached from aufs.
- */
- if (au_xino_brid(sb) < 0
- && au_br_writable(add_branch->br_perm)
- && !au_test_fs_bad_xino(h_dentry->d_sb)
- && add_branch->br_xino.xi_file
- && add_branch->br_xino.xi_file->f_path.dentry->d_parent == h_dentry)
- au_xino_brid_set(sb, add_branch->br_id);
-
-out:
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static unsigned long long au_farray_cb(struct super_block *sb, void *a,
- unsigned long long max __maybe_unused,
- void *arg)
-{
- unsigned long long n;
- struct file **p, *f;
- struct au_sphlhead *files;
- struct au_finfo *finfo;
-
- n = 0;
- p = a;
- files = &au_sbi(sb)->si_files;
- spin_lock(&files->spin);
- hlist_for_each_entry(finfo, &files->head, fi_hlist) {
- f = finfo->fi_file;
- if (file_count(f)
- && !special_file(file_inode(f)->i_mode)) {
- get_file(f);
- *p++ = f;
- n++;
- AuDebugOn(n > max);
- }
- }
- spin_unlock(&files->spin);
-
- return n;
-}
-
-static struct file **au_farray_alloc(struct super_block *sb,
- unsigned long long *max)
-{
- *max = atomic_long_read(&au_sbi(sb)->si_nfiles);
- return au_array_alloc(max, au_farray_cb, sb, /*arg*/NULL);
-}
-
-static void au_farray_free(struct file **a, unsigned long long max)
-{
- unsigned long long ull;
-
- for (ull = 0; ull < max; ull++)
- if (a[ull])
- fput(a[ull]);
- kvfree(a);
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * delete a branch
- */
-
-/* to show the line number, do not make it inlined function */
-#define AuVerbose(do_info, fmt, ...) do { \
- if (do_info) \
- pr_info(fmt, ##__VA_ARGS__); \
-} while (0)
-
-static int au_test_ibusy(struct inode *inode, aufs_bindex_t bstart,
- aufs_bindex_t bend)
-{
- return (inode && !S_ISDIR(inode->i_mode)) || bstart == bend;
-}
-
-static int au_test_dbusy(struct dentry *dentry, aufs_bindex_t bstart,
- aufs_bindex_t bend)
-{
- return au_test_ibusy(d_inode(dentry), bstart, bend);
-}
-
-/*
- * test if the branch is deletable or not.
- */
-static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex,
- unsigned int sigen, const unsigned int verbose)
-{
- int err, i, j, ndentry;
- aufs_bindex_t bstart, bend;
- struct au_dcsub_pages dpages;
- struct au_dpage *dpage;
- struct dentry *d;
-
- err = au_dpages_init(&dpages, GFP_NOFS);
- if (unlikely(err))
- goto out;
- err = au_dcsub_pages(&dpages, root, NULL, NULL);
- if (unlikely(err))
- goto out_dpages;
-
- for (i = 0; !err && i < dpages.ndpage; i++) {
- dpage = dpages.dpages + i;
- ndentry = dpage->ndentry;
- for (j = 0; !err && j < ndentry; j++) {
- d = dpage->dentries[j];
- AuDebugOn(au_dcount(d) <= 0);
- if (!au_digen_test(d, sigen)) {
- di_read_lock_child(d, AuLock_IR);
- if (unlikely(au_dbrange_test(d))) {
- di_read_unlock(d, AuLock_IR);
- continue;
- }
- } else {
- di_write_lock_child(d);
- if (unlikely(au_dbrange_test(d))) {
- di_write_unlock(d);
- continue;
- }
- err = au_reval_dpath(d, sigen);
- if (!err)
- di_downgrade_lock(d, AuLock_IR);
- else {
- di_write_unlock(d);
- break;
- }
- }
-
- /* AuDbgDentry(d); */
- bstart = au_dbstart(d);
- bend = au_dbend(d);
- if (bstart <= bindex
- && bindex <= bend
- && au_h_dptr(d, bindex)
- && au_test_dbusy(d, bstart, bend)) {
- err = -EBUSY;
- AuVerbose(verbose, "busy %pd\n", d);
- AuDbgDentry(d);
- }
- di_read_unlock(d, AuLock_IR);
- }
- }
-
-out_dpages:
- au_dpages_free(&dpages);
-out:
- return err;
-}
-
-static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex,
- unsigned int sigen, const unsigned int verbose)
-{
- int err;
- unsigned long long max, ull;
- struct inode *i, **array;
- aufs_bindex_t bstart, bend;
-
- array = au_iarray_alloc(sb, &max);
- err = PTR_ERR(array);
- if (IS_ERR(array))
- goto out;
-
- err = 0;
- AuDbg("b%d\n", bindex);
- for (ull = 0; !err && ull < max; ull++) {
- i = array[ull];
- if (unlikely(!i))
- break;
- if (i->i_ino == AUFS_ROOT_INO)
- continue;
-
- /* AuDbgInode(i); */
- if (au_iigen(i, NULL) == sigen)
- ii_read_lock_child(i);
- else {
- ii_write_lock_child(i);
- err = au_refresh_hinode_self(i);
- au_iigen_dec(i);
- if (!err)
- ii_downgrade_lock(i);
- else {
- ii_write_unlock(i);
- break;
- }
- }
-
- bstart = au_ibstart(i);
- bend = au_ibend(i);
- if (bstart <= bindex
- && bindex <= bend
- && au_h_iptr(i, bindex)
- && au_test_ibusy(i, bstart, bend)) {
- err = -EBUSY;
- AuVerbose(verbose, "busy i%lu\n", i->i_ino);
- AuDbgInode(i);
- }
- ii_read_unlock(i);
- }
- au_iarray_free(array, max);
-
-out:
- return err;
-}
-
-static int test_children_busy(struct dentry *root, aufs_bindex_t bindex,
- const unsigned int verbose)
-{
- int err;
- unsigned int sigen;
-
- sigen = au_sigen(root->d_sb);
- DiMustNoWaiters(root);
- IiMustNoWaiters(d_inode(root));
- di_write_unlock(root);
- err = test_dentry_busy(root, bindex, sigen, verbose);
- if (!err)
- err = test_inode_busy(root->d_sb, bindex, sigen, verbose);
- di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */
-
- return err;
-}
-
-static int test_dir_busy(struct file *file, aufs_bindex_t br_id,
- struct file **to_free, int *idx)
-{
- int err;
- unsigned char matched, root;
- aufs_bindex_t bindex, bend;
- struct au_fidir *fidir;
- struct au_hfile *hfile;
-
- err = 0;
- root = IS_ROOT(file->f_path.dentry);
- if (root) {
- get_file(file);
- to_free[*idx] = file;
- (*idx)++;
- goto out;
- }
-
- matched = 0;
- fidir = au_fi(file)->fi_hdir;
- AuDebugOn(!fidir);
- bend = au_fbend_dir(file);
- for (bindex = au_fbstart(file); bindex <= bend; bindex++) {
- hfile = fidir->fd_hfile + bindex;
- if (!hfile->hf_file)
- continue;
-
- if (hfile->hf_br->br_id == br_id) {
- matched = 1;
- break;
- }
- }
- if (matched)
- err = -EBUSY;
-
-out:
- return err;
-}
-
-static int test_file_busy(struct super_block *sb, aufs_bindex_t br_id,
- struct file **to_free, int opened)
-{
- int err, idx;
- unsigned long long ull, max;
- aufs_bindex_t bstart;
- struct file *file, **array;
- struct dentry *root;
- struct au_hfile *hfile;
-
- array = au_farray_alloc(sb, &max);
- err = PTR_ERR(array);
- if (IS_ERR(array))
- goto out;
-
- err = 0;
- idx = 0;
- root = sb->s_root;
- di_write_unlock(root);
- for (ull = 0; ull < max; ull++) {
- file = array[ull];
- if (unlikely(!file))
- break;
-
- /* AuDbg("%pD\n", file); */
- fi_read_lock(file);
- bstart = au_fbstart(file);
- if (!d_is_dir(file->f_path.dentry)) {
- hfile = &au_fi(file)->fi_htop;
- if (hfile->hf_br->br_id == br_id)
- err = -EBUSY;
- } else
- err = test_dir_busy(file, br_id, to_free, &idx);
- fi_read_unlock(file);
- if (unlikely(err))
- break;
- }
- di_write_lock_child(root);
- au_farray_free(array, max);
- AuDebugOn(idx > opened);
-
-out:
- return err;
-}
-
-static void br_del_file(struct file **to_free, unsigned long long opened,
- aufs_bindex_t br_id)
-{
- unsigned long long ull;
- aufs_bindex_t bindex, bstart, bend, bfound;
- struct file *file;
- struct au_fidir *fidir;
- struct au_hfile *hfile;
-
- for (ull = 0; ull < opened; ull++) {
- file = to_free[ull];
- if (unlikely(!file))
- break;
-
- /* AuDbg("%pD\n", file); */
- AuDebugOn(!d_is_dir(file->f_path.dentry));
- bfound = -1;
- fidir = au_fi(file)->fi_hdir;
- AuDebugOn(!fidir);
- fi_write_lock(file);
- bstart = au_fbstart(file);
- bend = au_fbend_dir(file);
- for (bindex = bstart; bindex <= bend; bindex++) {
- hfile = fidir->fd_hfile + bindex;
- if (!hfile->hf_file)
- continue;
-
- if (hfile->hf_br->br_id == br_id) {
- bfound = bindex;
- break;
- }
- }
- AuDebugOn(bfound < 0);
- au_set_h_fptr(file, bfound, NULL);
- if (bfound == bstart) {
- for (bstart++; bstart <= bend; bstart++)
- if (au_hf_dir(file, bstart)) {
- au_set_fbstart(file, bstart);
- break;
- }
- }
- fi_write_unlock(file);
- }
-}
-
-static void au_br_do_del_brp(struct au_sbinfo *sbinfo,
- const aufs_bindex_t bindex,
- const aufs_bindex_t bend)
-{
- struct au_branch **brp, **p;
-
- AuRwMustWriteLock(&sbinfo->si_rwsem);
-
- brp = sbinfo->si_branch + bindex;
- if (bindex < bend)
- memmove(brp, brp + 1, sizeof(*brp) * (bend - bindex));
- sbinfo->si_branch[0 + bend] = NULL;
- sbinfo->si_bend--;
-
- p = krealloc(sbinfo->si_branch, sizeof(*p) * bend, AuGFP_SBILIST);
- if (p)
- sbinfo->si_branch = p;
- /* harmless error */
-}
-
-static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex,
- const aufs_bindex_t bend)
-{
- struct au_hdentry *hdp, *p;
-
- AuRwMustWriteLock(&dinfo->di_rwsem);
-
- hdp = dinfo->di_hdentry;
- if (bindex < bend)
- memmove(hdp + bindex, hdp + bindex + 1,
- sizeof(*hdp) * (bend - bindex));
- hdp[0 + bend].hd_dentry = NULL;
- dinfo->di_bend--;
-
- p = krealloc(hdp, sizeof(*p) * bend, AuGFP_SBILIST);
- if (p)
- dinfo->di_hdentry = p;
- /* harmless error */
-}
-
-static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex,
- const aufs_bindex_t bend)
-{
- struct au_hinode *hip, *p;
-
- AuRwMustWriteLock(&iinfo->ii_rwsem);
-
- hip = iinfo->ii_hinode + bindex;
- if (bindex < bend)
- memmove(hip, hip + 1, sizeof(*hip) * (bend - bindex));
- iinfo->ii_hinode[0 + bend].hi_inode = NULL;
- au_hn_init(iinfo->ii_hinode + bend);
- iinfo->ii_bend--;
-
- p = krealloc(iinfo->ii_hinode, sizeof(*p) * bend, AuGFP_SBILIST);
- if (p)
- iinfo->ii_hinode = p;
- /* harmless error */
-}
-
-static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
- struct au_branch *br)
-{
- aufs_bindex_t bend;
- struct au_sbinfo *sbinfo;
- struct dentry *root, *h_root;
- struct inode *inode, *h_inode;
- struct au_hinode *hinode;
-
- SiMustWriteLock(sb);
-
- root = sb->s_root;
- inode = d_inode(root);
- sbinfo = au_sbi(sb);
- bend = sbinfo->si_bend;
-
- h_root = au_h_dptr(root, bindex);
- hinode = au_hi(inode, bindex);
- h_inode = au_igrab(hinode->hi_inode);
- au_hiput(hinode);
-
- au_sbilist_lock();
- au_br_do_del_brp(sbinfo, bindex, bend);
- au_br_do_del_hdp(au_di(root), bindex, bend);
- au_br_do_del_hip(au_ii(inode), bindex, bend);
- au_sbilist_unlock();
-
- dput(h_root);
- iput(h_inode);
- au_br_do_free(br);
-}
-
-static unsigned long long empty_cb(struct super_block *sb, void *array,
- unsigned long long max, void *arg)
-{
- return max;
-}
-
-int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
-{
- int err, rerr, i;
- unsigned long long opened;
- unsigned int mnt_flags;
- aufs_bindex_t bindex, bend, br_id;
- unsigned char do_wh, verbose;
- struct au_branch *br;
- struct au_wbr *wbr;
- struct dentry *root;
- struct file **to_free;
-
- err = 0;
- opened = 0;
- to_free = NULL;
- root = sb->s_root;
- bindex = au_find_dbindex(root, del->h_path.dentry);
- if (bindex < 0) {
- if (remount)
- goto out; /* success */
- err = -ENOENT;
- pr_err("%s no such branch\n", del->pathname);
- goto out;
- }
- AuDbg("bindex b%d\n", bindex);
-
- err = -EBUSY;
- mnt_flags = au_mntflags(sb);
- verbose = !!au_opt_test(mnt_flags, VERBOSE);
- bend = au_sbend(sb);
- if (unlikely(!bend)) {
- AuVerbose(verbose, "no more branches left\n");
- goto out;
- }
- br = au_sbr(sb, bindex);
- AuDebugOn(!path_equal(&br->br_path, &del->h_path));
-
- br_id = br->br_id;
- opened = atomic_read(&br->br_count);
- if (unlikely(opened)) {
- to_free = au_array_alloc(&opened, empty_cb, sb, NULL);
- err = PTR_ERR(to_free);
- if (IS_ERR(to_free))
- goto out;
-
- err = test_file_busy(sb, br_id, to_free, opened);
- if (unlikely(err)) {
- AuVerbose(verbose, "%llu file(s) opened\n", opened);
- goto out;
- }
- }
-
- wbr = br->br_wbr;
- do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph);
- if (do_wh) {
- /* instead of WbrWhMustWriteLock(wbr) */
- SiMustWriteLock(sb);
- for (i = 0; i < AuBrWh_Last; i++) {
- dput(wbr->wbr_wh[i]);
- wbr->wbr_wh[i] = NULL;
- }
- }
-
- err = test_children_busy(root, bindex, verbose);
- if (unlikely(err)) {
- if (do_wh)
- goto out_wh;
- goto out;
- }
-
- err = 0;
- if (to_free) {
- /*
- * now we confirmed the branch is deletable.
- * let's free the remaining opened dirs on the branch.
- */
- di_write_unlock(root);
- br_del_file(to_free, opened, br_id);
- di_write_lock_child(root);
- }
-
- if (!remount)
- au_br_do_del(sb, bindex, br);
- else {
- sysaufs_brs_del(sb, bindex);
- au_br_do_del(sb, bindex, br);
- sysaufs_brs_add(sb, bindex);
- }
-
- if (!bindex) {
- au_cpup_attr_all(d_inode(root), /*force*/1);
- sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes;
- } else
- au_sub_nlink(d_inode(root), d_inode(del->h_path.dentry));
- if (au_opt_test(mnt_flags, PLINK))
- au_plink_half_refresh(sb, br_id);
-
- if (au_xino_brid(sb) == br_id)
- au_xino_brid_set(sb, -1);
- goto out; /* success */
-
-out_wh:
- /* revert */
- rerr = au_br_init_wh(sb, br, br->br_perm);
- if (rerr)
- pr_warn("failed re-creating base whiteout, %s. (%d)\n",
- del->pathname, rerr);
-out:
- if (to_free)
- au_farray_free(to_free, opened);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static int au_ibusy(struct super_block *sb, struct aufs_ibusy __user *arg)
-{
- int err;
- aufs_bindex_t bstart, bend;
- struct aufs_ibusy ibusy;
- struct inode *inode, *h_inode;
-
- err = -EPERM;
- if (unlikely(!capable(CAP_SYS_ADMIN)))
- goto out;
-
- err = copy_from_user(&ibusy, arg, sizeof(ibusy));
- if (!err)
- err = !access_ok(VERIFY_WRITE, &arg->h_ino, sizeof(arg->h_ino));
- if (unlikely(err)) {
- err = -EFAULT;
- AuTraceErr(err);
- goto out;
- }
-
- err = -EINVAL;
- si_read_lock(sb, AuLock_FLUSH);
- if (unlikely(ibusy.bindex < 0 || ibusy.bindex > au_sbend(sb)))
- goto out_unlock;
-
- err = 0;
- ibusy.h_ino = 0; /* invalid */
- inode = ilookup(sb, ibusy.ino);
- if (!inode
- || inode->i_ino == AUFS_ROOT_INO
- || is_bad_inode(inode))
- goto out_unlock;
-
- ii_read_lock_child(inode);
- bstart = au_ibstart(inode);
- bend = au_ibend(inode);
- if (bstart <= ibusy.bindex && ibusy.bindex <= bend) {
- h_inode = au_h_iptr(inode, ibusy.bindex);
- if (h_inode && au_test_ibusy(inode, bstart, bend))
- ibusy.h_ino = h_inode->i_ino;
- }
- ii_read_unlock(inode);
- iput(inode);
-
-out_unlock:
- si_read_unlock(sb);
- if (!err) {
- err = __put_user(ibusy.h_ino, &arg->h_ino);
- if (unlikely(err)) {
- err = -EFAULT;
- AuTraceErr(err);
- }
- }
-out:
- return err;
-}
-
-long au_ibusy_ioctl(struct file *file, unsigned long arg)
-{
- return au_ibusy(file->f_path.dentry->d_sb, (void __user *)arg);
-}
-
-#ifdef CONFIG_COMPAT
-long au_ibusy_compat_ioctl(struct file *file, unsigned long arg)
-{
- return au_ibusy(file->f_path.dentry->d_sb, compat_ptr(arg));
-}
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * change a branch permission
- */
-
-static void au_warn_ima(void)
-{
-#ifdef CONFIG_IMA
- /* since it doesn't support mark_files_ro() */
- AuWarn1("RW -> RO makes IMA to produce wrong message\n");
-#endif
-}
-
-static int do_need_sigen_inc(int a, int b)
-{
- return au_br_whable(a) && !au_br_whable(b);
-}
-
-static int need_sigen_inc(int old, int new)
-{
- return do_need_sigen_inc(old, new)
- || do_need_sigen_inc(new, old);
-}
-
-static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex)
-{
- int err, do_warn;
- unsigned int mnt_flags;
- unsigned long long ull, max;
- aufs_bindex_t br_id;
- unsigned char verbose, writer;
- struct file *file, *hf, **array;
- struct au_hfile *hfile;
-
- mnt_flags = au_mntflags(sb);
- verbose = !!au_opt_test(mnt_flags, VERBOSE);
-
- array = au_farray_alloc(sb, &max);
- err = PTR_ERR(array);
- if (IS_ERR(array))
- goto out;
-
- do_warn = 0;
- br_id = au_sbr_id(sb, bindex);
- for (ull = 0; ull < max; ull++) {
- file = array[ull];
- if (unlikely(!file))
- break;
-
- /* AuDbg("%pD\n", file); */
- fi_read_lock(file);
- if (unlikely(au_test_mmapped(file))) {
- err = -EBUSY;
- AuVerbose(verbose, "mmapped %pD\n", file);
- AuDbgFile(file);
- FiMustNoWaiters(file);
- fi_read_unlock(file);
- goto out_array;
- }
-
- hfile = &au_fi(file)->fi_htop;
- hf = hfile->hf_file;
- if (!d_is_reg(file->f_path.dentry)
- || !(file->f_mode & FMODE_WRITE)
- || hfile->hf_br->br_id != br_id
- || !(hf->f_mode & FMODE_WRITE))
- array[ull] = NULL;
- else {
- do_warn = 1;
- get_file(file);
- }
-
- FiMustNoWaiters(file);
- fi_read_unlock(file);
- fput(file);
- }
-
- err = 0;
- if (do_warn)
- au_warn_ima();
-
- for (ull = 0; ull < max; ull++) {
- file = array[ull];
- if (!file)
- continue;
-
- /* todo: already flushed? */
- /*
- * fs/super.c:mark_files_ro() is gone, but aufs keeps its
- * approach which resets f_mode and calls mnt_drop_write() and
- * file_release_write() for each file, because the branch
- * attribute in aufs world is totally different from the native
- * fs rw/ro mode.
- */
- /* fi_read_lock(file); */
- hfile = &au_fi(file)->fi_htop;
- hf = hfile->hf_file;
- /* fi_read_unlock(file); */
- spin_lock(&hf->f_lock);
- writer = !!(hf->f_mode & FMODE_WRITER);
- hf->f_mode &= ~(FMODE_WRITE | FMODE_WRITER);
- spin_unlock(&hf->f_lock);
- if (writer) {
- put_write_access(file_inode(hf));
- __mnt_drop_write(hf->f_path.mnt);
- }
- }
-
-out_array:
- au_farray_free(array, max);
-out:
- AuTraceErr(err);
- return err;
-}
-
-int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
- int *do_refresh)
-{
- int err, rerr;
- aufs_bindex_t bindex;
- struct dentry *root;
- struct au_branch *br;
- struct au_br_fhsm *bf;
-
- root = sb->s_root;
- bindex = au_find_dbindex(root, mod->h_root);
- if (bindex < 0) {
- if (remount)
- return 0; /* success */
- err = -ENOENT;
- pr_err("%s no such branch\n", mod->path);
- goto out;
- }
- AuDbg("bindex b%d\n", bindex);
-
- err = test_br(d_inode(mod->h_root), mod->perm, mod->path);
- if (unlikely(err))
- goto out;
-
- br = au_sbr(sb, bindex);
- AuDebugOn(mod->h_root != au_br_dentry(br));
- if (br->br_perm == mod->perm)
- return 0; /* success */
-
- /* pre-allocate for non-fhsm --> fhsm */
- bf = NULL;
- if (!au_br_fhsm(br->br_perm) && au_br_fhsm(mod->perm)) {
- err = au_fhsm_br_alloc(br);
- if (unlikely(err))
- goto out;
- bf = br->br_fhsm;
- br->br_fhsm = NULL;
- }
-
- if (au_br_writable(br->br_perm)) {
- /* remove whiteout base */
- err = au_br_init_wh(sb, br, mod->perm);
- if (unlikely(err))
- goto out_bf;
-
- if (!au_br_writable(mod->perm)) {
- /* rw --> ro, file might be mmapped */
- DiMustNoWaiters(root);
- IiMustNoWaiters(d_inode(root));
- di_write_unlock(root);
- err = au_br_mod_files_ro(sb, bindex);
- /* aufs_write_lock() calls ..._child() */
- di_write_lock_child(root);
-
- if (unlikely(err)) {
- rerr = -ENOMEM;
- br->br_wbr = kzalloc(sizeof(*br->br_wbr),
- GFP_NOFS);
- if (br->br_wbr)
- rerr = au_wbr_init(br, sb, br->br_perm);
- if (unlikely(rerr)) {
- AuIOErr("nested error %d (%d)\n",
- rerr, err);
- br->br_perm = mod->perm;
- }
- }
- }
- } else if (au_br_writable(mod->perm)) {
- /* ro --> rw */
- err = -ENOMEM;
- br->br_wbr = kzalloc(sizeof(*br->br_wbr), GFP_NOFS);
- if (br->br_wbr) {
- err = au_wbr_init(br, sb, mod->perm);
- if (unlikely(err)) {
- kfree(br->br_wbr);
- br->br_wbr = NULL;
- }
- }
- }
- if (unlikely(err))
- goto out_bf;
-
- if (au_br_fhsm(br->br_perm)) {
- if (!au_br_fhsm(mod->perm)) {
- /* fhsm --> non-fhsm */
- au_br_fhsm_fin(br->br_fhsm);
- kfree(br->br_fhsm);
- br->br_fhsm = NULL;
- }
- } else if (au_br_fhsm(mod->perm))
- /* non-fhsm --> fhsm */
- br->br_fhsm = bf;
-
- *do_refresh |= need_sigen_inc(br->br_perm, mod->perm);
- br->br_perm = mod->perm;
- goto out; /* success */
-
-out_bf:
- kfree(bf);
-out:
- AuTraceErr(err);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs)
-{
- int err;
- struct kstatfs kstfs;
-
- err = vfs_statfs(&br->br_path, &kstfs);
- if (!err) {
- stfs->f_blocks = kstfs.f_blocks;
- stfs->f_bavail = kstfs.f_bavail;
- stfs->f_files = kstfs.f_files;
- stfs->f_ffree = kstfs.f_ffree;
- }
-
- return err;
-}
diff --git a/fs/aufs/branch.h b/fs/aufs/branch.h
deleted file mode 100644
index 4c52ae166..000000000
--- a/fs/aufs/branch.h
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * branch filesystems and xino for them
- */
-
-#ifndef __AUFS_BRANCH_H__
-#define __AUFS_BRANCH_H__
-
-#ifdef __KERNEL__
-
-#include <linux/mount.h>
-#include "dynop.h"
-#include "rwsem.h"
-#include "super.h"
-
-/* ---------------------------------------------------------------------- */
-
-/* a xino file */
-struct au_xino_file {
- struct file *xi_file;
- struct mutex xi_nondir_mtx;
-
- /* todo: make xino files an array to support huge inode number */
-
-#ifdef CONFIG_DEBUG_FS
- struct dentry *xi_dbgaufs;
-#endif
-};
-
-/* File-based Hierarchical Storage Management */
-struct au_br_fhsm {
-#ifdef CONFIG_AUFS_FHSM
- struct mutex bf_lock;
- unsigned long bf_jiffy;
- struct aufs_stfs bf_stfs;
- int bf_readable;
-#endif
-};
-
-/* members for writable branch only */
-enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last};
-struct au_wbr {
- struct au_rwsem wbr_wh_rwsem;
- struct dentry *wbr_wh[AuBrWh_Last];
- atomic_t wbr_wh_running;
-#define wbr_whbase wbr_wh[AuBrWh_BASE] /* whiteout base */
-#define wbr_plink wbr_wh[AuBrWh_PLINK] /* pseudo-link dir */
-#define wbr_orph wbr_wh[AuBrWh_ORPH] /* dir for orphans */
-
- /* mfs mode */
- unsigned long long wbr_bytes;
-};
-
-/* ext2 has 3 types of operations at least, ext3 has 4 */
-#define AuBrDynOp (AuDyLast * 4)
-
-#ifdef CONFIG_AUFS_HFSNOTIFY
-/* support for asynchronous destruction */
-struct au_br_hfsnotify {
- struct fsnotify_group *hfsn_group;
-};
-#endif
-
-/* sysfs entries */
-struct au_brsysfs {
- char name[16];
- struct attribute attr;
-};
-
-enum {
- AuBrSysfs_BR,
- AuBrSysfs_BRID,
- AuBrSysfs_Last
-};
-
-/* protected by superblock rwsem */
-struct au_branch {
- struct au_xino_file br_xino;
-
- aufs_bindex_t br_id;
-
- int br_perm;
- struct path br_path;
- spinlock_t br_dykey_lock;
- struct au_dykey *br_dykey[AuBrDynOp];
- atomic_t br_count;
-
- struct au_wbr *br_wbr;
- struct au_br_fhsm *br_fhsm;
-
- /* xino truncation */
- atomic_t br_xino_running;
-
-#ifdef CONFIG_AUFS_HFSNOTIFY
- struct au_br_hfsnotify *br_hfsn;
-#endif
-
-#ifdef CONFIG_SYSFS
- /* entries under sysfs per mount-point */
- struct au_brsysfs br_sysfs[AuBrSysfs_Last];
-#endif
-};
-
-/* ---------------------------------------------------------------------- */
-
-static inline struct vfsmount *au_br_mnt(struct au_branch *br)
-{
- return br->br_path.mnt;
-}
-
-static inline struct dentry *au_br_dentry(struct au_branch *br)
-{
- return br->br_path.dentry;
-}
-
-static inline struct super_block *au_br_sb(struct au_branch *br)
-{
- return au_br_mnt(br)->mnt_sb;
-}
-
-static inline int au_br_rdonly(struct au_branch *br)
-{
- return ((au_br_sb(br)->s_flags & MS_RDONLY)
- || !au_br_writable(br->br_perm))
- ? -EROFS : 0;
-}
-
-static inline int au_br_hnotifyable(int brperm __maybe_unused)
-{
-#ifdef CONFIG_AUFS_HNOTIFY
- return !(brperm & AuBrPerm_RR);
-#else
- return 0;
-#endif
-}
-
-static inline int au_br_test_oflag(int oflag, struct au_branch *br)
-{
- int err, exec_flag;
-
- err = 0;
- exec_flag = oflag & __FMODE_EXEC;
- if (unlikely(exec_flag && (au_br_mnt(br)->mnt_flags & MNT_NOEXEC)))
- err = -EACCES;
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* branch.c */
-struct au_sbinfo;
-void au_br_free(struct au_sbinfo *sinfo);
-int au_br_index(struct super_block *sb, aufs_bindex_t br_id);
-struct au_opt_add;
-int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount);
-struct au_opt_del;
-int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount);
-long au_ibusy_ioctl(struct file *file, unsigned long arg);
-#ifdef CONFIG_COMPAT
-long au_ibusy_compat_ioctl(struct file *file, unsigned long arg);
-#endif
-struct au_opt_mod;
-int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
- int *do_refresh);
-struct aufs_stfs;
-int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs);
-
-/* xino.c */
-static const loff_t au_loff_max = LLONG_MAX;
-
-int au_xib_trunc(struct super_block *sb);
-ssize_t xino_fread(vfs_readf_t func, struct file *file, void *buf, size_t size,
- loff_t *pos);
-ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
- size_t size, loff_t *pos);
-struct file *au_xino_create2(struct file *base_file, struct file *copy_src);
-struct file *au_xino_create(struct super_block *sb, char *fname, int silent);
-ino_t au_xino_new_ino(struct super_block *sb);
-void au_xino_delete_inode(struct inode *inode, const int unlinked);
-int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
- ino_t ino);
-int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
- ino_t *ino);
-int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t hino,
- struct file *base_file, int do_test);
-int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex);
-
-struct au_opt_xino;
-int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount);
-void au_xino_clr(struct super_block *sb);
-struct file *au_xino_def(struct super_block *sb);
-int au_xino_path(struct seq_file *seq, struct file *file);
-
-/* ---------------------------------------------------------------------- */
-
-/* Superblock to branch */
-static inline
-aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex)
-{
- return au_sbr(sb, bindex)->br_id;
-}
-
-static inline
-struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex)
-{
- return au_br_mnt(au_sbr(sb, bindex));
-}
-
-static inline
-struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex)
-{
- return au_br_sb(au_sbr(sb, bindex));
-}
-
-static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex)
-{
- atomic_dec(&au_sbr(sb, bindex)->br_count);
-}
-
-static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex)
-{
- return au_sbr(sb, bindex)->br_perm;
-}
-
-static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex)
-{
- return au_br_whable(au_sbr_perm(sb, bindex));
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * wbr_wh_read_lock, wbr_wh_write_lock
- * wbr_wh_read_unlock, wbr_wh_write_unlock, wbr_wh_downgrade_lock
- */
-AuSimpleRwsemFuncs(wbr_wh, struct au_wbr *wbr, &wbr->wbr_wh_rwsem);
-
-#define WbrWhMustNoWaiters(wbr) AuRwMustNoWaiters(&wbr->wbr_wh_rwsem)
-#define WbrWhMustAnyLock(wbr) AuRwMustAnyLock(&wbr->wbr_wh_rwsem)
-#define WbrWhMustWriteLock(wbr) AuRwMustWriteLock(&wbr->wbr_wh_rwsem)
-
-/* ---------------------------------------------------------------------- */
-
-#ifdef CONFIG_AUFS_FHSM
-static inline void au_br_fhsm_init(struct au_br_fhsm *brfhsm)
-{
- mutex_init(&brfhsm->bf_lock);
- brfhsm->bf_jiffy = 0;
- brfhsm->bf_readable = 0;
-}
-
-static inline void au_br_fhsm_fin(struct au_br_fhsm *brfhsm)
-{
- mutex_destroy(&brfhsm->bf_lock);
-}
-#else
-AuStubVoid(au_br_fhsm_init, struct au_br_fhsm *brfhsm)
-AuStubVoid(au_br_fhsm_fin, struct au_br_fhsm *brfhsm)
-#endif
-
-#endif /* __KERNEL__ */
-#endif /* __AUFS_BRANCH_H__ */
diff --git a/fs/aufs/cpup.c b/fs/aufs/cpup.c
deleted file mode 100644
index cadb3adb7..000000000
--- a/fs/aufs/cpup.c
+++ /dev/null
@@ -1,1366 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * copy-up functions, see wbr_policy.c for copy-down
- */
-
-#include <linux/fs_stack.h>
-#include <linux/mm.h>
-#include <linux/task_work.h>
-#include "aufs.h"
-
-void au_cpup_attr_flags(struct inode *dst, unsigned int iflags)
-{
- const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE
- | S_NOATIME | S_NOCMTIME | S_AUTOMOUNT;
-
- BUILD_BUG_ON(sizeof(iflags) != sizeof(dst->i_flags));
-
- dst->i_flags |= iflags & ~mask;
- if (au_test_fs_notime(dst->i_sb))
- dst->i_flags |= S_NOATIME | S_NOCMTIME;
-}
-
-void au_cpup_attr_timesizes(struct inode *inode)
-{
- struct inode *h_inode;
-
- h_inode = au_h_iptr(inode, au_ibstart(inode));
- fsstack_copy_attr_times(inode, h_inode);
- fsstack_copy_inode_size(inode, h_inode);
-}
-
-void au_cpup_attr_nlink(struct inode *inode, int force)
-{
- struct inode *h_inode;
- struct super_block *sb;
- aufs_bindex_t bindex, bend;
-
- sb = inode->i_sb;
- bindex = au_ibstart(inode);
- h_inode = au_h_iptr(inode, bindex);
- if (!force
- && !S_ISDIR(h_inode->i_mode)
- && au_opt_test(au_mntflags(sb), PLINK)
- && au_plink_test(inode))
- return;
-
- /*
- * 0 can happen in revalidating.
- * h_inode->i_mutex may not be held here, but it is harmless since once
- * i_nlink reaches 0, it will never become positive except O_TMPFILE
- * case.
- * todo: O_TMPFILE+linkat(AT_SYMLINK_FOLLOW) bypassing aufs may cause
- * the incorrect link count.
- */
- set_nlink(inode, h_inode->i_nlink);
-
- /*
- * fewer nlink makes find(1) noisy, but larger nlink doesn't.
- * it may includes whplink directory.
- */
- if (S_ISDIR(h_inode->i_mode)) {
- bend = au_ibend(inode);
- for (bindex++; bindex <= bend; bindex++) {
- h_inode = au_h_iptr(inode, bindex);
- if (h_inode)
- au_add_nlink(inode, h_inode);
- }
- }
-}
-
-void au_cpup_attr_changeable(struct inode *inode)
-{
- struct inode *h_inode;
-
- h_inode = au_h_iptr(inode, au_ibstart(inode));
- inode->i_mode = h_inode->i_mode;
- inode->i_uid = h_inode->i_uid;
- inode->i_gid = h_inode->i_gid;
- au_cpup_attr_timesizes(inode);
- au_cpup_attr_flags(inode, h_inode->i_flags);
-}
-
-void au_cpup_igen(struct inode *inode, struct inode *h_inode)
-{
- struct au_iinfo *iinfo = au_ii(inode);
-
- IiMustWriteLock(inode);
-
- iinfo->ii_higen = h_inode->i_generation;
- iinfo->ii_hsb1 = h_inode->i_sb;
-}
-
-void au_cpup_attr_all(struct inode *inode, int force)
-{
- struct inode *h_inode;
-
- h_inode = au_h_iptr(inode, au_ibstart(inode));
- au_cpup_attr_changeable(inode);
- if (inode->i_nlink > 0)
- au_cpup_attr_nlink(inode, force);
- inode->i_rdev = h_inode->i_rdev;
- inode->i_blkbits = h_inode->i_blkbits;
- au_cpup_igen(inode, h_inode);
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */
-
-/* keep the timestamps of the parent dir when cpup */
-void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
- struct path *h_path)
-{
- struct inode *h_inode;
-
- dt->dt_dentry = dentry;
- dt->dt_h_path = *h_path;
- h_inode = d_inode(h_path->dentry);
- dt->dt_atime = h_inode->i_atime;
- dt->dt_mtime = h_inode->i_mtime;
- /* smp_mb(); */
-}
-
-void au_dtime_revert(struct au_dtime *dt)
-{
- struct iattr attr;
- int err;
-
- attr.ia_atime = dt->dt_atime;
- attr.ia_mtime = dt->dt_mtime;
- attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET
- | ATTR_ATIME | ATTR_ATIME_SET;
-
- /* no delegation since this is a directory */
- err = vfsub_notify_change(&dt->dt_h_path, &attr, /*delegated*/NULL);
- if (unlikely(err))
- pr_warn("restoring timestamps failed(%d). ignored\n", err);
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* internal use only */
-struct au_cpup_reg_attr {
- int valid;
- struct kstat st;
- unsigned int iflags; /* inode->i_flags */
-};
-
-static noinline_for_stack
-int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src,
- struct au_cpup_reg_attr *h_src_attr)
-{
- int err, sbits, icex;
- unsigned int mnt_flags;
- unsigned char verbose;
- struct iattr ia;
- struct path h_path;
- struct inode *h_isrc, *h_idst;
- struct kstat *h_st;
- struct au_branch *br;
-
- h_path.dentry = au_h_dptr(dst, bindex);
- h_idst = d_inode(h_path.dentry);
- br = au_sbr(dst->d_sb, bindex);
- h_path.mnt = au_br_mnt(br);
- h_isrc = d_inode(h_src);
- ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID
- | ATTR_ATIME | ATTR_MTIME
- | ATTR_ATIME_SET | ATTR_MTIME_SET;
- if (h_src_attr && h_src_attr->valid) {
- h_st = &h_src_attr->st;
- ia.ia_uid = h_st->uid;
- ia.ia_gid = h_st->gid;
- ia.ia_atime = h_st->atime;
- ia.ia_mtime = h_st->mtime;
- if (h_idst->i_mode != h_st->mode
- && !S_ISLNK(h_idst->i_mode)) {
- ia.ia_valid |= ATTR_MODE;
- ia.ia_mode = h_st->mode;
- }
- sbits = !!(h_st->mode & (S_ISUID | S_ISGID));
- au_cpup_attr_flags(h_idst, h_src_attr->iflags);
- } else {
- ia.ia_uid = h_isrc->i_uid;
- ia.ia_gid = h_isrc->i_gid;
- ia.ia_atime = h_isrc->i_atime;
- ia.ia_mtime = h_isrc->i_mtime;
- if (h_idst->i_mode != h_isrc->i_mode
- && !S_ISLNK(h_idst->i_mode)) {
- ia.ia_valid |= ATTR_MODE;
- ia.ia_mode = h_isrc->i_mode;
- }
- sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID));
- au_cpup_attr_flags(h_idst, h_isrc->i_flags);
- }
- /* no delegation since it is just created */
- err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL);
-
- /* is this nfs only? */
- if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) {
- ia.ia_valid = ATTR_FORCE | ATTR_MODE;
- ia.ia_mode = h_isrc->i_mode;
- err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL);
- }
-
- icex = br->br_perm & AuBrAttr_ICEX;
- if (!err) {
- mnt_flags = au_mntflags(dst->d_sb);
- verbose = !!au_opt_test(mnt_flags, VERBOSE);
- err = au_cpup_xattr(h_path.dentry, h_src, icex, verbose);
- }
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static int au_do_copy_file(struct file *dst, struct file *src, loff_t len,
- char *buf, unsigned long blksize)
-{
- int err;
- size_t sz, rbytes, wbytes;
- unsigned char all_zero;
- char *p, *zp;
- struct mutex *h_mtx;
- /* reduce stack usage */
- struct iattr *ia;
-
- zp = page_address(ZERO_PAGE(0));
- if (unlikely(!zp))
- return -ENOMEM; /* possible? */
-
- err = 0;
- all_zero = 0;
- while (len) {
- AuDbg("len %lld\n", len);
- sz = blksize;
- if (len < blksize)
- sz = len;
-
- rbytes = 0;
- /* todo: signal_pending? */
- while (!rbytes || err == -EAGAIN || err == -EINTR) {
- rbytes = vfsub_read_k(src, buf, sz, &src->f_pos);
- err = rbytes;
- }
- if (unlikely(err < 0))
- break;
-
- all_zero = 0;
- if (len >= rbytes && rbytes == blksize)
- all_zero = !memcmp(buf, zp, rbytes);
- if (!all_zero) {
- wbytes = rbytes;
- p = buf;
- while (wbytes) {
- size_t b;
-
- b = vfsub_write_k(dst, p, wbytes, &dst->f_pos);
- err = b;
- /* todo: signal_pending? */
- if (unlikely(err == -EAGAIN || err == -EINTR))
- continue;
- if (unlikely(err < 0))
- break;
- wbytes -= b;
- p += b;
- }
- if (unlikely(err < 0))
- break;
- } else {
- loff_t res;
-
- AuLabel(hole);
- res = vfsub_llseek(dst, rbytes, SEEK_CUR);
- err = res;
- if (unlikely(res < 0))
- break;
- }
- len -= rbytes;
- err = 0;
- }
-
- /* the last block may be a hole */
- if (!err && all_zero) {
- AuLabel(last hole);
-
- err = 1;
- if (au_test_nfs(dst->f_path.dentry->d_sb)) {
- /* nfs requires this step to make last hole */
- /* is this only nfs? */
- do {
- /* todo: signal_pending? */
- err = vfsub_write_k(dst, "\0", 1, &dst->f_pos);
- } while (err == -EAGAIN || err == -EINTR);
- if (err == 1)
- dst->f_pos--;
- }
-
- if (err == 1) {
- ia = (void *)buf;
- ia->ia_size = dst->f_pos;
- ia->ia_valid = ATTR_SIZE | ATTR_FILE;
- ia->ia_file = dst;
- h_mtx = &file_inode(dst)->i_mutex;
- mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
- /* no delegation since it is just created */
- err = vfsub_notify_change(&dst->f_path, ia,
- /*delegated*/NULL);
- mutex_unlock(h_mtx);
- }
- }
-
- return err;
-}
-
-int au_copy_file(struct file *dst, struct file *src, loff_t len)
-{
- int err;
- unsigned long blksize;
- unsigned char do_kfree;
- char *buf;
-
- err = -ENOMEM;
- blksize = dst->f_path.dentry->d_sb->s_blocksize;
- if (!blksize || PAGE_SIZE < blksize)
- blksize = PAGE_SIZE;
- AuDbg("blksize %lu\n", blksize);
- do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *));
- if (do_kfree)
- buf = kmalloc(blksize, GFP_NOFS);
- else
- buf = (void *)__get_free_page(GFP_NOFS);
- if (unlikely(!buf))
- goto out;
-
- if (len > (1 << 22))
- AuDbg("copying a large file %lld\n", (long long)len);
-
- src->f_pos = 0;
- dst->f_pos = 0;
- err = au_do_copy_file(dst, src, len, buf, blksize);
- if (do_kfree)
- kfree(buf);
- else
- free_page((unsigned long)buf);
-
-out:
- return err;
-}
-
-/*
- * to support a sparse file which is opened with O_APPEND,
- * we need to close the file.
- */
-static int au_cp_regular(struct au_cp_generic *cpg)
-{
- int err, i;
- enum { SRC, DST };
- struct {
- aufs_bindex_t bindex;
- unsigned int flags;
- struct dentry *dentry;
- int force_wr;
- struct file *file;
- void *label;
- } *f, file[] = {
- {
- .bindex = cpg->bsrc,
- .flags = O_RDONLY | O_NOATIME | O_LARGEFILE,
- .label = &&out
- },
- {
- .bindex = cpg->bdst,
- .flags = O_WRONLY | O_NOATIME | O_LARGEFILE,
- .force_wr = !!au_ftest_cpup(cpg->flags, RWDST),
- .label = &&out_src
- }
- };
- struct super_block *sb;
- struct task_struct *tsk = current;
-
- /* bsrc branch can be ro/rw. */
- sb = cpg->dentry->d_sb;
- f = file;
- for (i = 0; i < 2; i++, f++) {
- f->dentry = au_h_dptr(cpg->dentry, f->bindex);
- f->file = au_h_open(cpg->dentry, f->bindex, f->flags,
- /*file*/NULL, f->force_wr);
- err = PTR_ERR(f->file);
- if (IS_ERR(f->file))
- goto *f->label;
- }
-
- /* try stopping to update while we copyup */
- IMustLock(d_inode(file[SRC].dentry));
- err = au_copy_file(file[DST].file, file[SRC].file, cpg->len);
-
- /* i wonder if we had O_NO_DELAY_FPUT flag */
- if (tsk->flags & PF_KTHREAD)
- __fput_sync(file[DST].file);
- else {
- WARN(1, "%pD\nPlease report this warning to aufs-users ML",
- file[DST].file);
- fput(file[DST].file);
- /*
- * too bad.
- * we have to call both since we don't know which place the file
- * was added to.
- */
- task_work_run();
- flush_delayed_fput();
- }
- au_sbr_put(sb, file[DST].bindex);
-
-out_src:
- fput(file[SRC].file);
- au_sbr_put(sb, file[SRC].bindex);
-out:
- return err;
-}
-
-static int au_do_cpup_regular(struct au_cp_generic *cpg,
- struct au_cpup_reg_attr *h_src_attr)
-{
- int err, rerr;
- loff_t l;
- struct path h_path;
- struct inode *h_src_inode, *h_dst_inode;
-
- err = 0;
- h_src_inode = au_h_iptr(d_inode(cpg->dentry), cpg->bsrc);
- l = i_size_read(h_src_inode);
- if (cpg->len == -1 || l < cpg->len)
- cpg->len = l;
- if (cpg->len) {
- /* try stopping to update while we are referencing */
- mutex_lock_nested(&h_src_inode->i_mutex, AuLsc_I_CHILD);
- au_pin_hdir_unlock(cpg->pin);
-
- h_path.dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
- h_path.mnt = au_sbr_mnt(cpg->dentry->d_sb, cpg->bsrc);
- h_src_attr->iflags = h_src_inode->i_flags;
- if (!au_test_nfs(h_src_inode->i_sb))
- err = vfs_getattr(&h_path, &h_src_attr->st);
- else {
- mutex_unlock(&h_src_inode->i_mutex);
- err = vfs_getattr(&h_path, &h_src_attr->st);
- mutex_lock_nested(&h_src_inode->i_mutex, AuLsc_I_CHILD);
- }
- if (unlikely(err)) {
- mutex_unlock(&h_src_inode->i_mutex);
- goto out;
- }
- h_src_attr->valid = 1;
- err = au_cp_regular(cpg);
- mutex_unlock(&h_src_inode->i_mutex);
- rerr = au_pin_hdir_relock(cpg->pin);
- if (!err && rerr)
- err = rerr;
- }
- if (!err && (h_src_inode->i_state & I_LINKABLE)) {
- h_path.dentry = au_h_dptr(cpg->dentry, cpg->bdst);
- h_dst_inode = d_inode(h_path.dentry);
- spin_lock(&h_dst_inode->i_lock);
- h_dst_inode->i_state |= I_LINKABLE;
- spin_unlock(&h_dst_inode->i_lock);
- }
-
-out:
- return err;
-}
-
-static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src,
- struct inode *h_dir)
-{
- int err, symlen;
- mm_segment_t old_fs;
- union {
- char *k;
- char __user *u;
- } sym;
- struct inode *h_inode = d_inode(h_src);
- const struct inode_operations *h_iop = h_inode->i_op;
-
- err = -ENOSYS;
- if (unlikely(!h_iop->readlink))
- goto out;
-
- err = -ENOMEM;
- sym.k = (void *)__get_free_page(GFP_NOFS);
- if (unlikely(!sym.k))
- goto out;
-
- /* unnecessary to support mmap_sem since symlink is not mmap-able */
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- symlen = h_iop->readlink(h_src, sym.u, PATH_MAX);
- err = symlen;
- set_fs(old_fs);
-
- if (symlen > 0) {
- sym.k[symlen] = 0;
- err = vfsub_symlink(h_dir, h_path, sym.k);
- }
- free_page((unsigned long)sym.k);
-
-out:
- return err;
-}
-
-/*
- * regardless 'acl' option, reset all ACL.
- * All ACL will be copied up later from the original entry on the lower branch.
- */
-static int au_reset_acl(struct inode *h_dir, struct path *h_path, umode_t mode)
-{
- int err;
- struct dentry *h_dentry;
- struct inode *h_inode;
-
- h_dentry = h_path->dentry;
- h_inode = d_inode(h_dentry);
- /* forget_all_cached_acls(h_inode)); */
- err = vfsub_removexattr(h_dentry, XATTR_NAME_POSIX_ACL_ACCESS);
- AuTraceErr(err);
- if (err == -EOPNOTSUPP)
- err = 0;
- if (!err)
- err = vfsub_acl_chmod(h_inode, mode);
-
- AuTraceErr(err);
- return err;
-}
-
-static int au_do_cpup_dir(struct au_cp_generic *cpg, struct dentry *dst_parent,
- struct inode *h_dir, struct path *h_path)
-{
- int err;
- struct inode *dir, *inode;
-
- err = vfsub_removexattr(h_path->dentry, XATTR_NAME_POSIX_ACL_DEFAULT);
- AuTraceErr(err);
- if (err == -EOPNOTSUPP)
- err = 0;
- if (unlikely(err))
- goto out;
-
- /*
- * strange behaviour from the users view,
- * particularry setattr case
- */
- dir = d_inode(dst_parent);
- if (au_ibstart(dir) == cpg->bdst)
- au_cpup_attr_nlink(dir, /*force*/1);
- inode = d_inode(cpg->dentry);
- au_cpup_attr_nlink(inode, /*force*/1);
-
-out:
- return err;
-}
-
-static noinline_for_stack
-int cpup_entry(struct au_cp_generic *cpg, struct dentry *dst_parent,
- struct au_cpup_reg_attr *h_src_attr)
-{
- int err;
- umode_t mode;
- unsigned int mnt_flags;
- unsigned char isdir, isreg, force;
- const unsigned char do_dt = !!au_ftest_cpup(cpg->flags, DTIME);
- struct au_dtime dt;
- struct path h_path;
- struct dentry *h_src, *h_dst, *h_parent;
- struct inode *h_inode, *h_dir;
- struct super_block *sb;
-
- /* bsrc branch can be ro/rw. */
- h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
- h_inode = d_inode(h_src);
- AuDebugOn(h_inode != au_h_iptr(d_inode(cpg->dentry), cpg->bsrc));
-
- /* try stopping to be referenced while we are creating */
- h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
- if (au_ftest_cpup(cpg->flags, RENAME))
- AuDebugOn(strncmp(h_dst->d_name.name, AUFS_WH_PFX,
- AUFS_WH_PFX_LEN));
- h_parent = h_dst->d_parent; /* dir inode is locked */
- h_dir = d_inode(h_parent);
- IMustLock(h_dir);
- AuDebugOn(h_parent != h_dst->d_parent);
-
- sb = cpg->dentry->d_sb;
- h_path.mnt = au_sbr_mnt(sb, cpg->bdst);
- if (do_dt) {
- h_path.dentry = h_parent;
- au_dtime_store(&dt, dst_parent, &h_path);
- }
- h_path.dentry = h_dst;
-
- isreg = 0;
- isdir = 0;
- mode = h_inode->i_mode;
- switch (mode & S_IFMT) {
- case S_IFREG:
- isreg = 1;
- err = vfsub_create(h_dir, &h_path, S_IRUSR | S_IWUSR,
- /*want_excl*/true);
- if (!err)
- err = au_do_cpup_regular(cpg, h_src_attr);
- break;
- case S_IFDIR:
- isdir = 1;
- err = vfsub_mkdir(h_dir, &h_path, mode);
- if (!err)
- err = au_do_cpup_dir(cpg, dst_parent, h_dir, &h_path);
- break;
- case S_IFLNK:
- err = au_do_cpup_symlink(&h_path, h_src, h_dir);
- break;
- case S_IFCHR:
- case S_IFBLK:
- AuDebugOn(!capable(CAP_MKNOD));
- /*FALLTHROUGH*/
- case S_IFIFO:
- case S_IFSOCK:
- err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev);
- break;
- default:
- AuIOErr("Unknown inode type 0%o\n", mode);
- err = -EIO;
- }
- if (!err)
- err = au_reset_acl(h_dir, &h_path, mode);
-
- mnt_flags = au_mntflags(sb);
- if (!au_opt_test(mnt_flags, UDBA_NONE)
- && !isdir
- && au_opt_test(mnt_flags, XINO)
- && (h_inode->i_nlink == 1
- || (h_inode->i_state & I_LINKABLE))
- /* todo: unnecessary? */
- /* && d_inode(cpg->dentry)->i_nlink == 1 */
- && cpg->bdst < cpg->bsrc
- && !au_ftest_cpup(cpg->flags, KEEPLINO))
- au_xino_write(sb, cpg->bsrc, h_inode->i_ino, /*ino*/0);
- /* ignore this error */
-
- if (!err) {
- force = 0;
- if (isreg) {
- force = !!cpg->len;
- if (cpg->len == -1)
- force = !!i_size_read(h_inode);
- }
- au_fhsm_wrote(sb, cpg->bdst, force);
- }
-
- if (do_dt)
- au_dtime_revert(&dt);
- return err;
-}
-
-static int au_do_ren_after_cpup(struct au_cp_generic *cpg, struct path *h_path)
-{
- int err;
- struct dentry *dentry, *h_dentry, *h_parent, *parent;
- struct inode *h_dir;
- aufs_bindex_t bdst;
-
- dentry = cpg->dentry;
- bdst = cpg->bdst;
- h_dentry = au_h_dptr(dentry, bdst);
- if (!au_ftest_cpup(cpg->flags, OVERWRITE)) {
- dget(h_dentry);
- au_set_h_dptr(dentry, bdst, NULL);
- err = au_lkup_neg(dentry, bdst, /*wh*/0);
- if (!err)
- h_path->dentry = dget(au_h_dptr(dentry, bdst));
- au_set_h_dptr(dentry, bdst, h_dentry);
- } else {
- err = 0;
- parent = dget_parent(dentry);
- h_parent = au_h_dptr(parent, bdst);
- dput(parent);
- h_path->dentry = vfsub_lkup_one(&dentry->d_name, h_parent);
- if (IS_ERR(h_path->dentry))
- err = PTR_ERR(h_path->dentry);
- }
- if (unlikely(err))
- goto out;
-
- h_parent = h_dentry->d_parent; /* dir inode is locked */
- h_dir = d_inode(h_parent);
- IMustLock(h_dir);
- AuDbg("%pd %pd\n", h_dentry, h_path->dentry);
- /* no delegation since it is just created */
- err = vfsub_rename(h_dir, h_dentry, h_dir, h_path, /*delegated*/NULL);
- dput(h_path->dentry);
-
-out:
- return err;
-}
-
-/*
- * copyup the @dentry from @bsrc to @bdst.
- * the caller must set the both of lower dentries.
- * @len is for truncating when it is -1 copyup the entire file.
- * in link/rename cases, @dst_parent may be different from the real one.
- * basic->bsrc can be larger than basic->bdst.
- */
-static int au_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
-{
- int err, rerr;
- aufs_bindex_t old_ibstart;
- unsigned char isdir, plink;
- struct dentry *h_src, *h_dst, *h_parent;
- struct inode *dst_inode, *h_dir, *inode, *delegated, *src_inode;
- struct super_block *sb;
- struct au_branch *br;
- /* to reuduce stack size */
- struct {
- struct au_dtime dt;
- struct path h_path;
- struct au_cpup_reg_attr h_src_attr;
- } *a;
-
- err = -ENOMEM;
- a = kmalloc(sizeof(*a), GFP_NOFS);
- if (unlikely(!a))
- goto out;
- a->h_src_attr.valid = 0;
-
- sb = cpg->dentry->d_sb;
- br = au_sbr(sb, cpg->bdst);
- a->h_path.mnt = au_br_mnt(br);
- h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
- h_parent = h_dst->d_parent; /* dir inode is locked */
- h_dir = d_inode(h_parent);
- IMustLock(h_dir);
-
- h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
- inode = d_inode(cpg->dentry);
-
- if (!dst_parent)
- dst_parent = dget_parent(cpg->dentry);
- else
- dget(dst_parent);
-
- plink = !!au_opt_test(au_mntflags(sb), PLINK);
- dst_inode = au_h_iptr(inode, cpg->bdst);
- if (dst_inode) {
- if (unlikely(!plink)) {
- err = -EIO;
- AuIOErr("hi%lu(i%lu) exists on b%d "
- "but plink is disabled\n",
- dst_inode->i_ino, inode->i_ino, cpg->bdst);
- goto out_parent;
- }
-
- if (dst_inode->i_nlink) {
- const int do_dt = au_ftest_cpup(cpg->flags, DTIME);
-
- h_src = au_plink_lkup(inode, cpg->bdst);
- err = PTR_ERR(h_src);
- if (IS_ERR(h_src))
- goto out_parent;
- if (unlikely(d_is_negative(h_src))) {
- err = -EIO;
- AuIOErr("i%lu exists on b%d "
- "but not pseudo-linked\n",
- inode->i_ino, cpg->bdst);
- dput(h_src);
- goto out_parent;
- }
-
- if (do_dt) {
- a->h_path.dentry = h_parent;
- au_dtime_store(&a->dt, dst_parent, &a->h_path);
- }
-
- a->h_path.dentry = h_dst;
- delegated = NULL;
- err = vfsub_link(h_src, h_dir, &a->h_path, &delegated);
- if (!err && au_ftest_cpup(cpg->flags, RENAME))
- err = au_do_ren_after_cpup(cpg, &a->h_path);
- if (do_dt)
- au_dtime_revert(&a->dt);
- if (unlikely(err == -EWOULDBLOCK)) {
- pr_warn("cannot retry for NFSv4 delegation"
- " for an internal link\n");
- iput(delegated);
- }
- dput(h_src);
- goto out_parent;
- } else
- /* todo: cpup_wh_file? */
- /* udba work */
- au_update_ibrange(inode, /*do_put_zero*/1);
- }
-
- isdir = S_ISDIR(inode->i_mode);
- old_ibstart = au_ibstart(inode);
- err = cpup_entry(cpg, dst_parent, &a->h_src_attr);
- if (unlikely(err))
- goto out_rev;
- dst_inode = d_inode(h_dst);
- mutex_lock_nested(&dst_inode->i_mutex, AuLsc_I_CHILD2);
- /* todo: necessary? */
- /* au_pin_hdir_unlock(cpg->pin); */
-
- err = cpup_iattr(cpg->dentry, cpg->bdst, h_src, &a->h_src_attr);
- if (unlikely(err)) {
- /* todo: necessary? */
- /* au_pin_hdir_relock(cpg->pin); */ /* ignore an error */
- mutex_unlock(&dst_inode->i_mutex);
- goto out_rev;
- }
-
- if (cpg->bdst < old_ibstart) {
- if (S_ISREG(inode->i_mode)) {
- err = au_dy_iaop(inode, cpg->bdst, dst_inode);
- if (unlikely(err)) {
- /* ignore an error */
- /* au_pin_hdir_relock(cpg->pin); */
- mutex_unlock(&dst_inode->i_mutex);
- goto out_rev;
- }
- }
- au_set_ibstart(inode, cpg->bdst);
- } else
- au_set_ibend(inode, cpg->bdst);
- au_set_h_iptr(inode, cpg->bdst, au_igrab(dst_inode),
- au_hi_flags(inode, isdir));
-
- /* todo: necessary? */
- /* err = au_pin_hdir_relock(cpg->pin); */
- mutex_unlock(&dst_inode->i_mutex);
- if (unlikely(err))
- goto out_rev;
-
- src_inode = d_inode(h_src);
- if (!isdir
- && (src_inode->i_nlink > 1
- || src_inode->i_state & I_LINKABLE)
- && plink)
- au_plink_append(inode, cpg->bdst, h_dst);
-
- if (au_ftest_cpup(cpg->flags, RENAME)) {
- a->h_path.dentry = h_dst;
- err = au_do_ren_after_cpup(cpg, &a->h_path);
- }
- if (!err)
- goto out_parent; /* success */
-
- /* revert */
-out_rev:
- a->h_path.dentry = h_parent;
- au_dtime_store(&a->dt, dst_parent, &a->h_path);
- a->h_path.dentry = h_dst;
- rerr = 0;
- if (d_is_positive(h_dst)) {
- if (!isdir) {
- /* no delegation since it is just created */
- rerr = vfsub_unlink(h_dir, &a->h_path,
- /*delegated*/NULL, /*force*/0);
- } else
- rerr = vfsub_rmdir(h_dir, &a->h_path);
- }
- au_dtime_revert(&a->dt);
- if (rerr) {
- AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr);
- err = -EIO;
- }
-out_parent:
- dput(dst_parent);
- kfree(a);
-out:
- return err;
-}
-
-#if 0 /* reserved */
-struct au_cpup_single_args {
- int *errp;
- struct au_cp_generic *cpg;
- struct dentry *dst_parent;
-};
-
-static void au_call_cpup_single(void *args)
-{
- struct au_cpup_single_args *a = args;
-
- au_pin_hdir_acquire_nest(a->cpg->pin);
- *a->errp = au_cpup_single(a->cpg, a->dst_parent);
- au_pin_hdir_release(a->cpg->pin);
-}
-#endif
-
-/*
- * prevent SIGXFSZ in copy-up.
- * testing CAP_MKNOD is for generic fs,
- * but CAP_FSETID is for xfs only, currently.
- */
-static int au_cpup_sio_test(struct au_pin *pin, umode_t mode)
-{
- int do_sio;
- struct super_block *sb;
- struct inode *h_dir;
-
- do_sio = 0;
- sb = au_pinned_parent(pin)->d_sb;
- if (!au_wkq_test()
- && (!au_sbi(sb)->si_plink_maint_pid
- || au_plink_maint(sb, AuLock_NOPLM))) {
- switch (mode & S_IFMT) {
- case S_IFREG:
- /* no condition about RLIMIT_FSIZE and the file size */
- do_sio = 1;
- break;
- case S_IFCHR:
- case S_IFBLK:
- do_sio = !capable(CAP_MKNOD);
- break;
- }
- if (!do_sio)
- do_sio = ((mode & (S_ISUID | S_ISGID))
- && !capable(CAP_FSETID));
- /* this workaround may be removed in the future */
- if (!do_sio) {
- h_dir = au_pinned_h_dir(pin);
- do_sio = h_dir->i_mode & S_ISVTX;
- }
- }
-
- return do_sio;
-}
-
-#if 0 /* reserved */
-int au_sio_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
-{
- int err, wkq_err;
- struct dentry *h_dentry;
-
- h_dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
- if (!au_cpup_sio_test(pin, d_inode(h_dentry)->i_mode))
- err = au_cpup_single(cpg, dst_parent);
- else {
- struct au_cpup_single_args args = {
- .errp = &err,
- .cpg = cpg,
- .dst_parent = dst_parent
- };
- wkq_err = au_wkq_wait(au_call_cpup_single, &args);
- if (unlikely(wkq_err))
- err = wkq_err;
- }
-
- return err;
-}
-#endif
-
-/*
- * copyup the @dentry from the first active lower branch to @bdst,
- * using au_cpup_single().
- */
-static int au_cpup_simple(struct au_cp_generic *cpg)
-{
- int err;
- unsigned int flags_orig;
- struct dentry *dentry;
-
- AuDebugOn(cpg->bsrc < 0);
-
- dentry = cpg->dentry;
- DiMustWriteLock(dentry);
-
- err = au_lkup_neg(dentry, cpg->bdst, /*wh*/1);
- if (!err) {
- flags_orig = cpg->flags;
- au_fset_cpup(cpg->flags, RENAME);
- err = au_cpup_single(cpg, NULL);
- cpg->flags = flags_orig;
- if (!err)
- return 0; /* success */
-
- /* revert */
- au_set_h_dptr(dentry, cpg->bdst, NULL);
- au_set_dbstart(dentry, cpg->bsrc);
- }
-
- return err;
-}
-
-struct au_cpup_simple_args {
- int *errp;
- struct au_cp_generic *cpg;
-};
-
-static void au_call_cpup_simple(void *args)
-{
- struct au_cpup_simple_args *a = args;
-
- au_pin_hdir_acquire_nest(a->cpg->pin);
- *a->errp = au_cpup_simple(a->cpg);
- au_pin_hdir_release(a->cpg->pin);
-}
-
-static int au_do_sio_cpup_simple(struct au_cp_generic *cpg)
-{
- int err, wkq_err;
- struct dentry *dentry, *parent;
- struct file *h_file;
- struct inode *h_dir;
-
- dentry = cpg->dentry;
- h_file = NULL;
- if (au_ftest_cpup(cpg->flags, HOPEN)) {
- AuDebugOn(cpg->bsrc < 0);
- h_file = au_h_open_pre(dentry, cpg->bsrc, /*force_wr*/0);
- err = PTR_ERR(h_file);
- if (IS_ERR(h_file))
- goto out;
- }
-
- parent = dget_parent(dentry);
- h_dir = au_h_iptr(d_inode(parent), cpg->bdst);
- if (!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE)
- && !au_cpup_sio_test(cpg->pin, d_inode(dentry)->i_mode))
- err = au_cpup_simple(cpg);
- else {
- struct au_cpup_simple_args args = {
- .errp = &err,
- .cpg = cpg
- };
- wkq_err = au_wkq_wait(au_call_cpup_simple, &args);
- if (unlikely(wkq_err))
- err = wkq_err;
- }
-
- dput(parent);
- if (h_file)
- au_h_open_post(dentry, cpg->bsrc, h_file);
-
-out:
- return err;
-}
-
-int au_sio_cpup_simple(struct au_cp_generic *cpg)
-{
- aufs_bindex_t bsrc, bend;
- struct dentry *dentry, *h_dentry;
-
- if (cpg->bsrc < 0) {
- dentry = cpg->dentry;
- bend = au_dbend(dentry);
- for (bsrc = cpg->bdst + 1; bsrc <= bend; bsrc++) {
- h_dentry = au_h_dptr(dentry, bsrc);
- if (h_dentry) {
- AuDebugOn(d_is_negative(h_dentry));
- break;
- }
- }
- AuDebugOn(bsrc > bend);
- cpg->bsrc = bsrc;
- }
- AuDebugOn(cpg->bsrc <= cpg->bdst);
- return au_do_sio_cpup_simple(cpg);
-}
-
-int au_sio_cpdown_simple(struct au_cp_generic *cpg)
-{
- AuDebugOn(cpg->bdst <= cpg->bsrc);
- return au_do_sio_cpup_simple(cpg);
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * copyup the deleted file for writing.
- */
-static int au_do_cpup_wh(struct au_cp_generic *cpg, struct dentry *wh_dentry,
- struct file *file)
-{
- int err;
- unsigned int flags_orig;
- aufs_bindex_t bsrc_orig;
- struct dentry *h_d_dst, *h_d_start;
- struct au_dinfo *dinfo;
- struct au_hdentry *hdp;
-
- dinfo = au_di(cpg->dentry);
- AuRwMustWriteLock(&dinfo->di_rwsem);
-
- bsrc_orig = cpg->bsrc;
- cpg->bsrc = dinfo->di_bstart;
- hdp = dinfo->di_hdentry;
- h_d_dst = hdp[0 + cpg->bdst].hd_dentry;
- dinfo->di_bstart = cpg->bdst;
- hdp[0 + cpg->bdst].hd_dentry = wh_dentry;
- h_d_start = NULL;
- if (file) {
- h_d_start = hdp[0 + cpg->bsrc].hd_dentry;
- hdp[0 + cpg->bsrc].hd_dentry = au_hf_top(file)->f_path.dentry;
- }
- flags_orig = cpg->flags;
- cpg->flags = !AuCpup_DTIME;
- err = au_cpup_single(cpg, /*h_parent*/NULL);
- cpg->flags = flags_orig;
- if (file) {
- if (!err)
- err = au_reopen_nondir(file);
- hdp[0 + cpg->bsrc].hd_dentry = h_d_start;
- }
- hdp[0 + cpg->bdst].hd_dentry = h_d_dst;
- dinfo->di_bstart = cpg->bsrc;
- cpg->bsrc = bsrc_orig;
-
- return err;
-}
-
-static int au_cpup_wh(struct au_cp_generic *cpg, struct file *file)
-{
- int err;
- aufs_bindex_t bdst;
- struct au_dtime dt;
- struct dentry *dentry, *parent, *h_parent, *wh_dentry;
- struct au_branch *br;
- struct path h_path;
-
- dentry = cpg->dentry;
- bdst = cpg->bdst;
- br = au_sbr(dentry->d_sb, bdst);
- parent = dget_parent(dentry);
- h_parent = au_h_dptr(parent, bdst);
- wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
- err = PTR_ERR(wh_dentry);
- if (IS_ERR(wh_dentry))
- goto out;
-
- h_path.dentry = h_parent;
- h_path.mnt = au_br_mnt(br);
- au_dtime_store(&dt, parent, &h_path);
- err = au_do_cpup_wh(cpg, wh_dentry, file);
- if (unlikely(err))
- goto out_wh;
-
- dget(wh_dentry);
- h_path.dentry = wh_dentry;
- if (!d_is_dir(wh_dentry)) {
- /* no delegation since it is just created */
- err = vfsub_unlink(d_inode(h_parent), &h_path,
- /*delegated*/NULL, /*force*/0);
- } else
- err = vfsub_rmdir(d_inode(h_parent), &h_path);
- if (unlikely(err)) {
- AuIOErr("failed remove copied-up tmp file %pd(%d)\n",
- wh_dentry, err);
- err = -EIO;
- }
- au_dtime_revert(&dt);
- au_set_hi_wh(d_inode(dentry), bdst, wh_dentry);
-
-out_wh:
- dput(wh_dentry);
-out:
- dput(parent);
- return err;
-}
-
-struct au_cpup_wh_args {
- int *errp;
- struct au_cp_generic *cpg;
- struct file *file;
-};
-
-static void au_call_cpup_wh(void *args)
-{
- struct au_cpup_wh_args *a = args;
-
- au_pin_hdir_acquire_nest(a->cpg->pin);
- *a->errp = au_cpup_wh(a->cpg, a->file);
- au_pin_hdir_release(a->cpg->pin);
-}
-
-int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file)
-{
- int err, wkq_err;
- aufs_bindex_t bdst;
- struct dentry *dentry, *parent, *h_orph, *h_parent;
- struct inode *dir, *h_dir, *h_tmpdir;
- struct au_wbr *wbr;
- struct au_pin wh_pin, *pin_orig;
-
- dentry = cpg->dentry;
- bdst = cpg->bdst;
- parent = dget_parent(dentry);
- dir = d_inode(parent);
- h_orph = NULL;
- h_parent = NULL;
- h_dir = au_igrab(au_h_iptr(dir, bdst));
- h_tmpdir = h_dir;
- pin_orig = NULL;
- if (!h_dir->i_nlink) {
- wbr = au_sbr(dentry->d_sb, bdst)->br_wbr;
- h_orph = wbr->wbr_orph;
-
- h_parent = dget(au_h_dptr(parent, bdst));
- au_set_h_dptr(parent, bdst, dget(h_orph));
- h_tmpdir = d_inode(h_orph);
- au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0);
-
- mutex_lock_nested(&h_tmpdir->i_mutex, AuLsc_I_PARENT3);
- /* todo: au_h_open_pre()? */
-
- pin_orig = cpg->pin;
- au_pin_init(&wh_pin, dentry, bdst, AuLsc_DI_PARENT,
- AuLsc_I_PARENT3, cpg->pin->udba, AuPin_DI_LOCKED);
- cpg->pin = &wh_pin;
- }
-
- if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE)
- && !au_cpup_sio_test(cpg->pin, d_inode(dentry)->i_mode))
- err = au_cpup_wh(cpg, file);
- else {
- struct au_cpup_wh_args args = {
- .errp = &err,
- .cpg = cpg,
- .file = file
- };
- wkq_err = au_wkq_wait(au_call_cpup_wh, &args);
- if (unlikely(wkq_err))
- err = wkq_err;
- }
-
- if (h_orph) {
- mutex_unlock(&h_tmpdir->i_mutex);
- /* todo: au_h_open_post()? */
- au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0);
- au_set_h_dptr(parent, bdst, h_parent);
- AuDebugOn(!pin_orig);
- cpg->pin = pin_orig;
- }
- iput(h_dir);
- dput(parent);
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * generic routine for both of copy-up and copy-down.
- */
-/* cf. revalidate function in file.c */
-int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
- int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
- struct au_pin *pin,
- struct dentry *h_parent, void *arg),
- void *arg)
-{
- int err;
- struct au_pin pin;
- struct dentry *d, *parent, *h_parent, *real_parent, *h_dentry;
-
- err = 0;
- parent = dget_parent(dentry);
- if (IS_ROOT(parent))
- goto out;
-
- au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2,
- au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE);
-
- /* do not use au_dpage */
- real_parent = parent;
- while (1) {
- dput(parent);
- parent = dget_parent(dentry);
- h_parent = au_h_dptr(parent, bdst);
- if (h_parent)
- goto out; /* success */
-
- /* find top dir which is necessary to cpup */
- do {
- d = parent;
- dput(parent);
- parent = dget_parent(d);
- di_read_lock_parent3(parent, !AuLock_IR);
- h_parent = au_h_dptr(parent, bdst);
- di_read_unlock(parent, !AuLock_IR);
- } while (!h_parent);
-
- if (d != real_parent)
- di_write_lock_child3(d);
-
- /* somebody else might create while we were sleeping */
- h_dentry = au_h_dptr(d, bdst);
- if (!h_dentry || d_is_negative(h_dentry)) {
- if (h_dentry)
- au_update_dbstart(d);
-
- au_pin_set_dentry(&pin, d);
- err = au_do_pin(&pin);
- if (!err) {
- err = cp(d, bdst, &pin, h_parent, arg);
- au_unpin(&pin);
- }
- }
-
- if (d != real_parent)
- di_write_unlock(d);
- if (unlikely(err))
- break;
- }
-
-out:
- dput(parent);
- return err;
-}
-
-static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst,
- struct au_pin *pin,
- struct dentry *h_parent __maybe_unused,
- void *arg __maybe_unused)
-{
- struct au_cp_generic cpg = {
- .dentry = dentry,
- .bdst = bdst,
- .bsrc = -1,
- .len = 0,
- .pin = pin,
- .flags = AuCpup_DTIME
- };
- return au_sio_cpup_simple(&cpg);
-}
-
-int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
-{
- return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL);
-}
-
-int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
-{
- int err;
- struct dentry *parent;
- struct inode *dir;
-
- parent = dget_parent(dentry);
- dir = d_inode(parent);
- err = 0;
- if (au_h_iptr(dir, bdst))
- goto out;
-
- di_read_unlock(parent, AuLock_IR);
- di_write_lock_parent(parent);
- /* someone else might change our inode while we were sleeping */
- if (!au_h_iptr(dir, bdst))
- err = au_cpup_dirs(dentry, bdst);
- di_downgrade_lock(parent, AuLock_IR);
-
-out:
- dput(parent);
- return err;
-}
diff --git a/fs/aufs/cpup.h b/fs/aufs/cpup.h
deleted file mode 100644
index ccba2c427..000000000
--- a/fs/aufs/cpup.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * copy-up/down functions
- */
-
-#ifndef __AUFS_CPUP_H__
-#define __AUFS_CPUP_H__
-
-#ifdef __KERNEL__
-
-#include <linux/path.h>
-
-struct inode;
-struct file;
-struct au_pin;
-
-void au_cpup_attr_flags(struct inode *dst, unsigned int iflags);
-void au_cpup_attr_timesizes(struct inode *inode);
-void au_cpup_attr_nlink(struct inode *inode, int force);
-void au_cpup_attr_changeable(struct inode *inode);
-void au_cpup_igen(struct inode *inode, struct inode *h_inode);
-void au_cpup_attr_all(struct inode *inode, int force);
-
-/* ---------------------------------------------------------------------- */
-
-struct au_cp_generic {
- struct dentry *dentry;
- aufs_bindex_t bdst, bsrc;
- loff_t len;
- struct au_pin *pin;
- unsigned int flags;
-};
-
-/* cpup flags */
-#define AuCpup_DTIME 1 /* do dtime_store/revert */
-#define AuCpup_KEEPLINO (1 << 1) /* do not clear the lower xino,
- for link(2) */
-#define AuCpup_RENAME (1 << 2) /* rename after cpup */
-#define AuCpup_HOPEN (1 << 3) /* call h_open_pre/post() in
- cpup */
-#define AuCpup_OVERWRITE (1 << 4) /* allow overwriting the
- existing entry */
-#define AuCpup_RWDST (1 << 5) /* force write target even if
- the branch is marked as RO */
-
-#define au_ftest_cpup(flags, name) ((flags) & AuCpup_##name)
-#define au_fset_cpup(flags, name) \
- do { (flags) |= AuCpup_##name; } while (0)
-#define au_fclr_cpup(flags, name) \
- do { (flags) &= ~AuCpup_##name; } while (0)
-
-int au_copy_file(struct file *dst, struct file *src, loff_t len);
-int au_sio_cpup_simple(struct au_cp_generic *cpg);
-int au_sio_cpdown_simple(struct au_cp_generic *cpg);
-int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file);
-
-int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
- int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
- struct au_pin *pin,
- struct dentry *h_parent, void *arg),
- void *arg);
-int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
-int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
-
-/* ---------------------------------------------------------------------- */
-
-/* keep timestamps when copyup */
-struct au_dtime {
- struct dentry *dt_dentry;
- struct path dt_h_path;
- struct timespec dt_atime, dt_mtime;
-};
-void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
- struct path *h_path);
-void au_dtime_revert(struct au_dtime *dt);
-
-#endif /* __KERNEL__ */
-#endif /* __AUFS_CPUP_H__ */
diff --git a/fs/aufs/dbgaufs.c b/fs/aufs/dbgaufs.c
deleted file mode 100644
index 0aefb5ed8..000000000
--- a/fs/aufs/dbgaufs.c
+++ /dev/null
@@ -1,419 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * debugfs interface
- */
-
-#include <linux/debugfs.h>
-#include "aufs.h"
-
-#ifndef CONFIG_SYSFS
-#error DEBUG_FS depends upon SYSFS
-#endif
-
-static struct dentry *dbgaufs;
-static const mode_t dbgaufs_mode = S_IRUSR | S_IRGRP | S_IROTH;
-
-/* 20 is max digits length of ulong 64 */
-struct dbgaufs_arg {
- int n;
- char a[20 * 4];
-};
-
-/*
- * common function for all XINO files
- */
-static int dbgaufs_xi_release(struct inode *inode __maybe_unused,
- struct file *file)
-{
- kfree(file->private_data);
- return 0;
-}
-
-static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt)
-{
- int err;
- struct kstat st;
- struct dbgaufs_arg *p;
-
- err = -ENOMEM;
- p = kmalloc(sizeof(*p), GFP_NOFS);
- if (unlikely(!p))
- goto out;
-
- err = 0;
- p->n = 0;
- file->private_data = p;
- if (!xf)
- goto out;
-
- err = vfs_getattr(&xf->f_path, &st);
- if (!err) {
- if (do_fcnt)
- p->n = snprintf
- (p->a, sizeof(p->a), "%ld, %llux%lu %lld\n",
- (long)file_count(xf), st.blocks, st.blksize,
- (long long)st.size);
- else
- p->n = snprintf(p->a, sizeof(p->a), "%llux%lu %lld\n",
- st.blocks, st.blksize,
- (long long)st.size);
- AuDebugOn(p->n >= sizeof(p->a));
- } else {
- p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err);
- err = 0;
- }
-
-out:
- return err;
-
-}
-
-static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct dbgaufs_arg *p;
-
- p = file->private_data;
- return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
-}
-
-/* ---------------------------------------------------------------------- */
-
-struct dbgaufs_plink_arg {
- int n;
- char a[];
-};
-
-static int dbgaufs_plink_release(struct inode *inode __maybe_unused,
- struct file *file)
-{
- free_page((unsigned long)file->private_data);
- return 0;
-}
-
-static int dbgaufs_plink_open(struct inode *inode, struct file *file)
-{
- int err, i, limit;
- unsigned long n, sum;
- struct dbgaufs_plink_arg *p;
- struct au_sbinfo *sbinfo;
- struct super_block *sb;
- struct au_sphlhead *sphl;
-
- err = -ENOMEM;
- p = (void *)get_zeroed_page(GFP_NOFS);
- if (unlikely(!p))
- goto out;
-
- err = -EFBIG;
- sbinfo = inode->i_private;
- sb = sbinfo->si_sb;
- si_noflush_read_lock(sb);
- if (au_opt_test(au_mntflags(sb), PLINK)) {
- limit = PAGE_SIZE - sizeof(p->n);
-
- /* the number of buckets */
- n = snprintf(p->a + p->n, limit, "%d\n", AuPlink_NHASH);
- p->n += n;
- limit -= n;
-
- sum = 0;
- for (i = 0, sphl = sbinfo->si_plink;
- i < AuPlink_NHASH;
- i++, sphl++) {
- n = au_sphl_count(sphl);
- sum += n;
-
- n = snprintf(p->a + p->n, limit, "%lu ", n);
- p->n += n;
- limit -= n;
- if (unlikely(limit <= 0))
- goto out_free;
- }
- p->a[p->n - 1] = '\n';
-
- /* the sum of plinks */
- n = snprintf(p->a + p->n, limit, "%lu\n", sum);
- p->n += n;
- limit -= n;
- if (unlikely(limit <= 0))
- goto out_free;
- } else {
-#define str "1\n0\n0\n"
- p->n = sizeof(str) - 1;
- strcpy(p->a, str);
-#undef str
- }
- si_read_unlock(sb);
-
- err = 0;
- file->private_data = p;
- goto out; /* success */
-
-out_free:
- free_page((unsigned long)p);
-out:
- return err;
-}
-
-static ssize_t dbgaufs_plink_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct dbgaufs_plink_arg *p;
-
- p = file->private_data;
- return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
-}
-
-static const struct file_operations dbgaufs_plink_fop = {
- .owner = THIS_MODULE,
- .open = dbgaufs_plink_open,
- .release = dbgaufs_plink_release,
- .read = dbgaufs_plink_read
-};
-
-/* ---------------------------------------------------------------------- */
-
-static int dbgaufs_xib_open(struct inode *inode, struct file *file)
-{
- int err;
- struct au_sbinfo *sbinfo;
- struct super_block *sb;
-
- sbinfo = inode->i_private;
- sb = sbinfo->si_sb;
- si_noflush_read_lock(sb);
- err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0);
- si_read_unlock(sb);
- return err;
-}
-
-static const struct file_operations dbgaufs_xib_fop = {
- .owner = THIS_MODULE,
- .open = dbgaufs_xib_open,
- .release = dbgaufs_xi_release,
- .read = dbgaufs_xi_read
-};
-
-/* ---------------------------------------------------------------------- */
-
-#define DbgaufsXi_PREFIX "xi"
-
-static int dbgaufs_xino_open(struct inode *inode, struct file *file)
-{
- int err;
- long l;
- struct au_sbinfo *sbinfo;
- struct super_block *sb;
- struct file *xf;
- struct qstr *name;
-
- err = -ENOENT;
- xf = NULL;
- name = &file->f_path.dentry->d_name;
- if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX)
- || memcmp(name->name, DbgaufsXi_PREFIX,
- sizeof(DbgaufsXi_PREFIX) - 1)))
- goto out;
- err = kstrtol(name->name + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l);
- if (unlikely(err))
- goto out;
-
- sbinfo = inode->i_private;
- sb = sbinfo->si_sb;
- si_noflush_read_lock(sb);
- if (l <= au_sbend(sb)) {
- xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file;
- err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1);
- } else
- err = -ENOENT;
- si_read_unlock(sb);
-
-out:
- return err;
-}
-
-static const struct file_operations dbgaufs_xino_fop = {
- .owner = THIS_MODULE,
- .open = dbgaufs_xino_open,
- .release = dbgaufs_xi_release,
- .read = dbgaufs_xi_read
-};
-
-void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
-{
- aufs_bindex_t bend;
- struct au_branch *br;
- struct au_xino_file *xi;
-
- if (!au_sbi(sb)->si_dbgaufs)
- return;
-
- bend = au_sbend(sb);
- for (; bindex <= bend; bindex++) {
- br = au_sbr(sb, bindex);
- xi = &br->br_xino;
- debugfs_remove(xi->xi_dbgaufs);
- xi->xi_dbgaufs = NULL;
- }
-}
-
-void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
-{
- struct au_sbinfo *sbinfo;
- struct dentry *parent;
- struct au_branch *br;
- struct au_xino_file *xi;
- aufs_bindex_t bend;
- char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */
-
- sbinfo = au_sbi(sb);
- parent = sbinfo->si_dbgaufs;
- if (!parent)
- return;
-
- bend = au_sbend(sb);
- for (; bindex <= bend; bindex++) {
- snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex);
- br = au_sbr(sb, bindex);
- xi = &br->br_xino;
- AuDebugOn(xi->xi_dbgaufs);
- xi->xi_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent,
- sbinfo, &dbgaufs_xino_fop);
- /* ignore an error */
- if (unlikely(!xi->xi_dbgaufs))
- AuWarn1("failed %s under debugfs\n", name);
- }
-}
-
-/* ---------------------------------------------------------------------- */
-
-#ifdef CONFIG_AUFS_EXPORT
-static int dbgaufs_xigen_open(struct inode *inode, struct file *file)
-{
- int err;
- struct au_sbinfo *sbinfo;
- struct super_block *sb;
-
- sbinfo = inode->i_private;
- sb = sbinfo->si_sb;
- si_noflush_read_lock(sb);
- err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0);
- si_read_unlock(sb);
- return err;
-}
-
-static const struct file_operations dbgaufs_xigen_fop = {
- .owner = THIS_MODULE,
- .open = dbgaufs_xigen_open,
- .release = dbgaufs_xi_release,
- .read = dbgaufs_xi_read
-};
-
-static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
-{
- int err;
-
- /*
- * This function is a dynamic '__init' function actually,
- * so the tiny check for si_rwsem is unnecessary.
- */
- /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
-
- err = -EIO;
- sbinfo->si_dbgaufs_xigen = debugfs_create_file
- ("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
- &dbgaufs_xigen_fop);
- if (sbinfo->si_dbgaufs_xigen)
- err = 0;
-
- return err;
-}
-#else
-static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
-{
- return 0;
-}
-#endif /* CONFIG_AUFS_EXPORT */
-
-/* ---------------------------------------------------------------------- */
-
-void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
-{
- /*
- * This function is a dynamic '__fin' function actually,
- * so the tiny check for si_rwsem is unnecessary.
- */
- /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
-
- debugfs_remove_recursive(sbinfo->si_dbgaufs);
- sbinfo->si_dbgaufs = NULL;
- kobject_put(&sbinfo->si_kobj);
-}
-
-int dbgaufs_si_init(struct au_sbinfo *sbinfo)
-{
- int err;
- char name[SysaufsSiNameLen];
-
- /*
- * This function is a dynamic '__init' function actually,
- * so the tiny check for si_rwsem is unnecessary.
- */
- /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
-
- err = -ENOENT;
- if (!dbgaufs) {
- AuErr1("/debug/aufs is uninitialized\n");
- goto out;
- }
-
- err = -EIO;
- sysaufs_name(sbinfo, name);
- sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs);
- if (unlikely(!sbinfo->si_dbgaufs))
- goto out;
- kobject_get(&sbinfo->si_kobj);
-
- sbinfo->si_dbgaufs_xib = debugfs_create_file
- ("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
- &dbgaufs_xib_fop);
- if (unlikely(!sbinfo->si_dbgaufs_xib))
- goto out_dir;
-
- sbinfo->si_dbgaufs_plink = debugfs_create_file
- ("plink", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
- &dbgaufs_plink_fop);
- if (unlikely(!sbinfo->si_dbgaufs_plink))
- goto out_dir;
-
- err = dbgaufs_xigen_init(sbinfo);
- if (!err)
- goto out; /* success */
-
-out_dir:
- dbgaufs_si_fin(sbinfo);
-out:
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-void dbgaufs_fin(void)
-{
- debugfs_remove(dbgaufs);
-}
-
-int __init dbgaufs_init(void)
-{
- int err;
-
- err = -EIO;
- dbgaufs = debugfs_create_dir(AUFS_NAME, NULL);
- if (dbgaufs)
- err = 0;
- return err;
-}
diff --git a/fs/aufs/dbgaufs.h b/fs/aufs/dbgaufs.h
deleted file mode 100644
index 81f272e42..000000000
--- a/fs/aufs/dbgaufs.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * debugfs interface
- */
-
-#ifndef __DBGAUFS_H__
-#define __DBGAUFS_H__
-
-#ifdef __KERNEL__
-
-struct super_block;
-struct au_sbinfo;
-
-#ifdef CONFIG_DEBUG_FS
-/* dbgaufs.c */
-void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
-void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
-void dbgaufs_si_fin(struct au_sbinfo *sbinfo);
-int dbgaufs_si_init(struct au_sbinfo *sbinfo);
-void dbgaufs_fin(void);
-int __init dbgaufs_init(void);
-#else
-AuStubVoid(dbgaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
-AuStubVoid(dbgaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
-AuStubVoid(dbgaufs_si_fin, struct au_sbinfo *sbinfo)
-AuStubInt0(dbgaufs_si_init, struct au_sbinfo *sbinfo)
-AuStubVoid(dbgaufs_fin, void)
-AuStubInt0(__init dbgaufs_init, void)
-#endif /* CONFIG_DEBUG_FS */
-
-#endif /* __KERNEL__ */
-#endif /* __DBGAUFS_H__ */
diff --git a/fs/aufs/dcsub.c b/fs/aufs/dcsub.c
deleted file mode 100644
index e72accebb..000000000
--- a/fs/aufs/dcsub.c
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * sub-routines for dentry cache
- */
-
-#include "aufs.h"
-
-static void au_dpage_free(struct au_dpage *dpage)
-{
- int i;
- struct dentry **p;
-
- p = dpage->dentries;
- for (i = 0; i < dpage->ndentry; i++)
- dput(*p++);
- free_page((unsigned long)dpage->dentries);
-}
-
-int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp)
-{
- int err;
- void *p;
-
- err = -ENOMEM;
- dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp);
- if (unlikely(!dpages->dpages))
- goto out;
-
- p = (void *)__get_free_page(gfp);
- if (unlikely(!p))
- goto out_dpages;
-
- dpages->dpages[0].ndentry = 0;
- dpages->dpages[0].dentries = p;
- dpages->ndpage = 1;
- return 0; /* success */
-
-out_dpages:
- kfree(dpages->dpages);
-out:
- return err;
-}
-
-void au_dpages_free(struct au_dcsub_pages *dpages)
-{
- int i;
- struct au_dpage *p;
-
- p = dpages->dpages;
- for (i = 0; i < dpages->ndpage; i++)
- au_dpage_free(p++);
- kfree(dpages->dpages);
-}
-
-static int au_dpages_append(struct au_dcsub_pages *dpages,
- struct dentry *dentry, gfp_t gfp)
-{
- int err, sz;
- struct au_dpage *dpage;
- void *p;
-
- dpage = dpages->dpages + dpages->ndpage - 1;
- sz = PAGE_SIZE / sizeof(dentry);
- if (unlikely(dpage->ndentry >= sz)) {
- AuLabel(new dpage);
- err = -ENOMEM;
- sz = dpages->ndpage * sizeof(*dpages->dpages);
- p = au_kzrealloc(dpages->dpages, sz,
- sz + sizeof(*dpages->dpages), gfp);
- if (unlikely(!p))
- goto out;
-
- dpages->dpages = p;
- dpage = dpages->dpages + dpages->ndpage;
- p = (void *)__get_free_page(gfp);
- if (unlikely(!p))
- goto out;
-
- dpage->ndentry = 0;
- dpage->dentries = p;
- dpages->ndpage++;
- }
-
- AuDebugOn(au_dcount(dentry) <= 0);
- dpage->dentries[dpage->ndentry++] = dget_dlock(dentry);
- return 0; /* success */
-
-out:
- return err;
-}
-
-/* todo: BAD approach */
-/* copied from linux/fs/dcache.c */
-enum d_walk_ret {
- D_WALK_CONTINUE,
- D_WALK_QUIT,
- D_WALK_NORETRY,
- D_WALK_SKIP,
-};
-
-extern void d_walk(struct dentry *parent, void *data,
- enum d_walk_ret (*enter)(void *, struct dentry *),
- void (*finish)(void *));
-
-struct ac_dpages_arg {
- int err;
- struct au_dcsub_pages *dpages;
- struct super_block *sb;
- au_dpages_test test;
- void *arg;
-};
-
-static enum d_walk_ret au_call_dpages_append(void *_arg, struct dentry *dentry)
-{
- enum d_walk_ret ret;
- struct ac_dpages_arg *arg = _arg;
-
- ret = D_WALK_CONTINUE;
- if (dentry->d_sb == arg->sb
- && !IS_ROOT(dentry)
- && au_dcount(dentry) > 0
- && au_di(dentry)
- && (!arg->test || arg->test(dentry, arg->arg))) {
- arg->err = au_dpages_append(arg->dpages, dentry, GFP_ATOMIC);
- if (unlikely(arg->err))
- ret = D_WALK_QUIT;
- }
-
- return ret;
-}
-
-int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
- au_dpages_test test, void *arg)
-{
- struct ac_dpages_arg args = {
- .err = 0,
- .dpages = dpages,
- .sb = root->d_sb,
- .test = test,
- .arg = arg
- };
-
- d_walk(root, &args, au_call_dpages_append, NULL);
-
- return args.err;
-}
-
-int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
- int do_include, au_dpages_test test, void *arg)
-{
- int err;
-
- err = 0;
- write_seqlock(&rename_lock);
- spin_lock(&dentry->d_lock);
- if (do_include
- && au_dcount(dentry) > 0
- && (!test || test(dentry, arg)))
- err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
- spin_unlock(&dentry->d_lock);
- if (unlikely(err))
- goto out;
-
- /*
- * RCU for vfsmount is unnecessary since this is a traverse in a single
- * mount
- */
- while (!IS_ROOT(dentry)) {
- dentry = dentry->d_parent; /* rename_lock is locked */
- spin_lock(&dentry->d_lock);
- if (au_dcount(dentry) > 0
- && (!test || test(dentry, arg)))
- err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
- spin_unlock(&dentry->d_lock);
- if (unlikely(err))
- break;
- }
-
-out:
- write_sequnlock(&rename_lock);
- return err;
-}
-
-static inline int au_dcsub_dpages_aufs(struct dentry *dentry, void *arg)
-{
- return au_di(dentry) && dentry->d_sb == arg;
-}
-
-int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
- struct dentry *dentry, int do_include)
-{
- return au_dcsub_pages_rev(dpages, dentry, do_include,
- au_dcsub_dpages_aufs, dentry->d_sb);
-}
-
-int au_test_subdir(struct dentry *d1, struct dentry *d2)
-{
- struct path path[2] = {
- {
- .dentry = d1
- },
- {
- .dentry = d2
- }
- };
-
- return path_is_under(path + 0, path + 1);
-}
diff --git a/fs/aufs/dcsub.h b/fs/aufs/dcsub.h
deleted file mode 100644
index 5d2cf661d..000000000
--- a/fs/aufs/dcsub.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * sub-routines for dentry cache
- */
-
-#ifndef __AUFS_DCSUB_H__
-#define __AUFS_DCSUB_H__
-
-#ifdef __KERNEL__
-
-#include <linux/dcache.h>
-#include <linux/fs.h>
-
-struct au_dpage {
- int ndentry;
- struct dentry **dentries;
-};
-
-struct au_dcsub_pages {
- int ndpage;
- struct au_dpage *dpages;
-};
-
-/* ---------------------------------------------------------------------- */
-
-/* dcsub.c */
-int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp);
-void au_dpages_free(struct au_dcsub_pages *dpages);
-typedef int (*au_dpages_test)(struct dentry *dentry, void *arg);
-int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
- au_dpages_test test, void *arg);
-int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
- int do_include, au_dpages_test test, void *arg);
-int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
- struct dentry *dentry, int do_include);
-int au_test_subdir(struct dentry *d1, struct dentry *d2);
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * todo: in linux-3.13, several similar (but faster) helpers are added to
- * include/linux/dcache.h. Try them (in the future).
- */
-
-static inline int au_d_hashed_positive(struct dentry *d)
-{
- int err;
- struct inode *inode = d_inode(d);
-
- err = 0;
- if (unlikely(d_unhashed(d)
- || d_is_negative(d)
- || !inode->i_nlink))
- err = -ENOENT;
- return err;
-}
-
-static inline int au_d_linkable(struct dentry *d)
-{
- int err;
- struct inode *inode = d_inode(d);
-
- err = au_d_hashed_positive(d);
- if (err
- && d_is_positive(d)
- && (inode->i_state & I_LINKABLE))
- err = 0;
- return err;
-}
-
-static inline int au_d_alive(struct dentry *d)
-{
- int err;
- struct inode *inode;
-
- err = 0;
- if (!IS_ROOT(d))
- err = au_d_hashed_positive(d);
- else {
- inode = d_inode(d);
- if (unlikely(d_unlinked(d)
- || d_is_negative(d)
- || !inode->i_nlink))
- err = -ENOENT;
- }
- return err;
-}
-
-static inline int au_alive_dir(struct dentry *d)
-{
- int err;
-
- err = au_d_alive(d);
- if (unlikely(err || IS_DEADDIR(d_inode(d))))
- err = -ENOENT;
- return err;
-}
-
-static inline int au_qstreq(struct qstr *a, struct qstr *b)
-{
- return a->len == b->len
- && !memcmp(a->name, b->name, a->len);
-}
-
-/*
- * by the commit
- * 360f547 2015-01-25 dcache: let the dentry count go down to zero without
- * taking d_lock
- * the type of d_lockref.count became int, but the inlined function d_count()
- * still returns unsigned int.
- * I don't know why. Maybe it is for every d_count() users?
- * Anyway au_dcount() lives on.
- */
-static inline int au_dcount(struct dentry *d)
-{
- return (int)d_count(d);
-}
-
-#endif /* __KERNEL__ */
-#endif /* __AUFS_DCSUB_H__ */
diff --git a/fs/aufs/debug.c b/fs/aufs/debug.c
deleted file mode 100644
index 4529831a9..000000000
--- a/fs/aufs/debug.c
+++ /dev/null
@@ -1,425 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * debug print functions
- */
-
-#include "aufs.h"
-
-/* Returns 0, or -errno. arg is in kp->arg. */
-static int param_atomic_t_set(const char *val, const struct kernel_param *kp)
-{
- int err, n;
-
- err = kstrtoint(val, 0, &n);
- if (!err) {
- if (n > 0)
- au_debug_on();
- else
- au_debug_off();
- }
- return err;
-}
-
-/* Returns length written or -errno. Buffer is 4k (ie. be short!) */
-static int param_atomic_t_get(char *buffer, const struct kernel_param *kp)
-{
- atomic_t *a;
-
- a = kp->arg;
- return sprintf(buffer, "%d", atomic_read(a));
-}
-
-static struct kernel_param_ops param_ops_atomic_t = {
- .set = param_atomic_t_set,
- .get = param_atomic_t_get
- /* void (*free)(void *arg) */
-};
-
-atomic_t aufs_debug = ATOMIC_INIT(0);
-MODULE_PARM_DESC(debug, "debug print");
-module_param_named(debug, aufs_debug, atomic_t, S_IRUGO | S_IWUSR | S_IWGRP);
-
-DEFINE_MUTEX(au_dbg_mtx); /* just to serialize the dbg msgs */
-char *au_plevel = KERN_DEBUG;
-#define dpri(fmt, ...) do { \
- if ((au_plevel \
- && strcmp(au_plevel, KERN_DEBUG)) \
- || au_debug_test()) \
- printk("%s" fmt, au_plevel, ##__VA_ARGS__); \
-} while (0)
-
-/* ---------------------------------------------------------------------- */
-
-void au_dpri_whlist(struct au_nhash *whlist)
-{
- unsigned long ul, n;
- struct hlist_head *head;
- struct au_vdir_wh *pos;
-
- n = whlist->nh_num;
- head = whlist->nh_head;
- for (ul = 0; ul < n; ul++) {
- hlist_for_each_entry(pos, head, wh_hash)
- dpri("b%d, %.*s, %d\n",
- pos->wh_bindex,
- pos->wh_str.len, pos->wh_str.name,
- pos->wh_str.len);
- head++;
- }
-}
-
-void au_dpri_vdir(struct au_vdir *vdir)
-{
- unsigned long ul;
- union au_vdir_deblk_p p;
- unsigned char *o;
-
- if (!vdir || IS_ERR(vdir)) {
- dpri("err %ld\n", PTR_ERR(vdir));
- return;
- }
-
- dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %lu\n",
- vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk,
- vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version);
- for (ul = 0; ul < vdir->vd_nblk; ul++) {
- p.deblk = vdir->vd_deblk[ul];
- o = p.deblk;
- dpri("[%lu]: %p\n", ul, o);
- }
-}
-
-static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode, int hn,
- struct dentry *wh)
-{
- char *n = NULL;
- int l = 0;
-
- if (!inode || IS_ERR(inode)) {
- dpri("i%d: err %ld\n", bindex, PTR_ERR(inode));
- return -1;
- }
-
- /* the type of i_blocks depends upon CONFIG_LBDAF */
- BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long)
- && sizeof(inode->i_blocks) != sizeof(u64));
- if (wh) {
- n = (void *)wh->d_name.name;
- l = wh->d_name.len;
- }
-
- dpri("i%d: %p, i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu,"
- " hn %d, ct %lld, np %lu, st 0x%lx, f 0x%x, v %llu, g %x%s%.*s\n",
- bindex, inode,
- inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??",
- atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode,
- i_size_read(inode), (unsigned long long)inode->i_blocks,
- hn, (long long)timespec_to_ns(&inode->i_ctime) & 0x0ffff,
- inode->i_mapping ? inode->i_mapping->nrpages : 0,
- inode->i_state, inode->i_flags, inode->i_version,
- inode->i_generation,
- l ? ", wh " : "", l, n);
- return 0;
-}
-
-void au_dpri_inode(struct inode *inode)
-{
- struct au_iinfo *iinfo;
- aufs_bindex_t bindex;
- int err, hn;
-
- err = do_pri_inode(-1, inode, -1, NULL);
- if (err || !au_test_aufs(inode->i_sb))
- return;
-
- iinfo = au_ii(inode);
- if (!iinfo)
- return;
- dpri("i-1: bstart %d, bend %d, gen %d\n",
- iinfo->ii_bstart, iinfo->ii_bend, au_iigen(inode, NULL));
- if (iinfo->ii_bstart < 0)
- return;
- hn = 0;
- for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; bindex++) {
- hn = !!au_hn(iinfo->ii_hinode + bindex);
- do_pri_inode(bindex, iinfo->ii_hinode[0 + bindex].hi_inode, hn,
- iinfo->ii_hinode[0 + bindex].hi_whdentry);
- }
-}
-
-void au_dpri_dalias(struct inode *inode)
-{
- struct dentry *d;
-
- spin_lock(&inode->i_lock);
- hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias)
- au_dpri_dentry(d);
- spin_unlock(&inode->i_lock);
-}
-
-static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
-{
- struct dentry *wh = NULL;
- int hn;
- struct au_iinfo *iinfo;
-
- if (!dentry || IS_ERR(dentry)) {
- dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry));
- return -1;
- }
- /* do not call dget_parent() here */
- /* note: access d_xxx without d_lock */
- dpri("d%d: %p, %pd2?, %s, cnt %d, flags 0x%x, %shashed\n",
- bindex, dentry, dentry,
- dentry->d_sb ? au_sbtype(dentry->d_sb) : "??",
- au_dcount(dentry), dentry->d_flags,
- d_unhashed(dentry) ? "un" : "");
- hn = -1;
- if (bindex >= 0
- && d_is_positive(dentry)
- && au_test_aufs(dentry->d_sb)) {
- iinfo = au_ii(d_inode(dentry));
- if (iinfo) {
- hn = !!au_hn(iinfo->ii_hinode + bindex);
- wh = iinfo->ii_hinode[0 + bindex].hi_whdentry;
- }
- }
- do_pri_inode(bindex, d_inode(dentry), hn, wh);
- return 0;
-}
-
-void au_dpri_dentry(struct dentry *dentry)
-{
- struct au_dinfo *dinfo;
- aufs_bindex_t bindex;
- int err;
- struct au_hdentry *hdp;
-
- err = do_pri_dentry(-1, dentry);
- if (err || !au_test_aufs(dentry->d_sb))
- return;
-
- dinfo = au_di(dentry);
- if (!dinfo)
- return;
- dpri("d-1: bstart %d, bend %d, bwh %d, bdiropq %d, gen %d, tmp %d\n",
- dinfo->di_bstart, dinfo->di_bend,
- dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry),
- dinfo->di_tmpfile);
- if (dinfo->di_bstart < 0)
- return;
- hdp = dinfo->di_hdentry;
- for (bindex = dinfo->di_bstart; bindex <= dinfo->di_bend; bindex++)
- do_pri_dentry(bindex, hdp[0 + bindex].hd_dentry);
-}
-
-static int do_pri_file(aufs_bindex_t bindex, struct file *file)
-{
- char a[32];
-
- if (!file || IS_ERR(file)) {
- dpri("f%d: err %ld\n", bindex, PTR_ERR(file));
- return -1;
- }
- a[0] = 0;
- if (bindex < 0
- && !IS_ERR_OR_NULL(file->f_path.dentry)
- && au_test_aufs(file->f_path.dentry->d_sb)
- && au_fi(file))
- snprintf(a, sizeof(a), ", gen %d, mmapped %d",
- au_figen(file), atomic_read(&au_fi(file)->fi_mmapped));
- dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, v %llu, pos %llu%s\n",
- bindex, file->f_mode, file->f_flags, (long)file_count(file),
- file->f_version, file->f_pos, a);
- if (!IS_ERR_OR_NULL(file->f_path.dentry))
- do_pri_dentry(bindex, file->f_path.dentry);
- return 0;
-}
-
-void au_dpri_file(struct file *file)
-{
- struct au_finfo *finfo;
- struct au_fidir *fidir;
- struct au_hfile *hfile;
- aufs_bindex_t bindex;
- int err;
-
- err = do_pri_file(-1, file);
- if (err
- || IS_ERR_OR_NULL(file->f_path.dentry)
- || !au_test_aufs(file->f_path.dentry->d_sb))
- return;
-
- finfo = au_fi(file);
- if (!finfo)
- return;
- if (finfo->fi_btop < 0)
- return;
- fidir = finfo->fi_hdir;
- if (!fidir)
- do_pri_file(finfo->fi_btop, finfo->fi_htop.hf_file);
- else
- for (bindex = finfo->fi_btop;
- bindex >= 0 && bindex <= fidir->fd_bbot;
- bindex++) {
- hfile = fidir->fd_hfile + bindex;
- do_pri_file(bindex, hfile ? hfile->hf_file : NULL);
- }
-}
-
-static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br)
-{
- struct vfsmount *mnt;
- struct super_block *sb;
-
- if (!br || IS_ERR(br))
- goto out;
- mnt = au_br_mnt(br);
- if (!mnt || IS_ERR(mnt))
- goto out;
- sb = mnt->mnt_sb;
- if (!sb || IS_ERR(sb))
- goto out;
-
- dpri("s%d: {perm 0x%x, id %d, cnt %d, wbr %p}, "
- "%s, dev 0x%02x%02x, flags 0x%lx, cnt %d, active %d, "
- "xino %d\n",
- bindex, br->br_perm, br->br_id, atomic_read(&br->br_count),
- br->br_wbr, au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev),
- sb->s_flags, sb->s_count,
- atomic_read(&sb->s_active), !!br->br_xino.xi_file);
- return 0;
-
-out:
- dpri("s%d: err %ld\n", bindex, PTR_ERR(br));
- return -1;
-}
-
-void au_dpri_sb(struct super_block *sb)
-{
- struct au_sbinfo *sbinfo;
- aufs_bindex_t bindex;
- int err;
- /* to reuduce stack size */
- struct {
- struct vfsmount mnt;
- struct au_branch fake;
- } *a;
-
- /* this function can be called from magic sysrq */
- a = kzalloc(sizeof(*a), GFP_ATOMIC);
- if (unlikely(!a)) {
- dpri("no memory\n");
- return;
- }
-
- a->mnt.mnt_sb = sb;
- a->fake.br_path.mnt = &a->mnt;
- atomic_set(&a->fake.br_count, 0);
- smp_mb(); /* atomic_set */
- err = do_pri_br(-1, &a->fake);
- kfree(a);
- dpri("dev 0x%x\n", sb->s_dev);
- if (err || !au_test_aufs(sb))
- return;
-
- sbinfo = au_sbi(sb);
- if (!sbinfo)
- return;
- dpri("nw %d, gen %u, kobj %d\n",
- atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation,
- atomic_read(&sbinfo->si_kobj.kref.refcount));
- for (bindex = 0; bindex <= sbinfo->si_bend; bindex++)
- do_pri_br(bindex, sbinfo->si_branch[0 + bindex]);
-}
-
-/* ---------------------------------------------------------------------- */
-
-void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line)
-{
- struct inode *h_inode, *inode = d_inode(dentry);
- struct dentry *h_dentry;
- aufs_bindex_t bindex, bend, bi;
-
- if (!inode /* || au_di(dentry)->di_lsc == AuLsc_DI_TMP */)
- return;
-
- bend = au_dbend(dentry);
- bi = au_ibend(inode);
- if (bi < bend)
- bend = bi;
- bindex = au_dbstart(dentry);
- bi = au_ibstart(inode);
- if (bi > bindex)
- bindex = bi;
-
- for (; bindex <= bend; bindex++) {
- h_dentry = au_h_dptr(dentry, bindex);
- if (!h_dentry)
- continue;
- h_inode = au_h_iptr(inode, bindex);
- if (unlikely(h_inode != d_inode(h_dentry))) {
- au_debug_on();
- AuDbg("b%d, %s:%d\n", bindex, func, line);
- AuDbgDentry(dentry);
- AuDbgInode(inode);
- au_debug_off();
- BUG();
- }
- }
-}
-
-void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
-{
- int err, i, j;
- struct au_dcsub_pages dpages;
- struct au_dpage *dpage;
- struct dentry **dentries;
-
- err = au_dpages_init(&dpages, GFP_NOFS);
- AuDebugOn(err);
- err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/1);
- AuDebugOn(err);
- for (i = dpages.ndpage - 1; !err && i >= 0; i--) {
- dpage = dpages.dpages + i;
- dentries = dpage->dentries;
- for (j = dpage->ndentry - 1; !err && j >= 0; j--)
- AuDebugOn(au_digen_test(dentries[j], sigen));
- }
- au_dpages_free(&dpages);
-}
-
-void au_dbg_verify_kthread(void)
-{
- if (au_wkq_test()) {
- au_dbg_blocked();
- /*
- * It may be recursive, but udba=notify between two aufs mounts,
- * where a single ro branch is shared, is not a problem.
- */
- /* WARN_ON(1); */
- }
-}
-
-/* ---------------------------------------------------------------------- */
-
-int __init au_debug_init(void)
-{
- aufs_bindex_t bindex;
- struct au_vdir_destr destr;
-
- bindex = -1;
- AuDebugOn(bindex >= 0);
-
- destr.len = -1;
- AuDebugOn(destr.len < NAME_MAX);
-
-#ifdef CONFIG_4KSTACKS
- pr_warn("CONFIG_4KSTACKS is defined.\n");
-#endif
-
- return 0;
-}
diff --git a/fs/aufs/debug.h b/fs/aufs/debug.h
deleted file mode 100644
index 0567f31d0..000000000
--- a/fs/aufs/debug.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * debug print functions
- */
-
-#ifndef __AUFS_DEBUG_H__
-#define __AUFS_DEBUG_H__
-
-#ifdef __KERNEL__
-
-#include <linux/atomic.h>
-#include <linux/module.h>
-#include <linux/kallsyms.h>
-#include <linux/sysrq.h>
-
-#ifdef CONFIG_AUFS_DEBUG
-#define AuDebugOn(a) BUG_ON(a)
-
-/* module parameter */
-extern atomic_t aufs_debug;
-static inline void au_debug_on(void)
-{
- atomic_inc(&aufs_debug);
-}
-static inline void au_debug_off(void)
-{
- atomic_dec_if_positive(&aufs_debug);
-}
-
-static inline int au_debug_test(void)
-{
- return atomic_read(&aufs_debug) > 0;
-}
-#else
-#define AuDebugOn(a) do {} while (0)
-AuStubVoid(au_debug_on, void)
-AuStubVoid(au_debug_off, void)
-AuStubInt0(au_debug_test, void)
-#endif /* CONFIG_AUFS_DEBUG */
-
-#define param_check_atomic_t(name, p) __param_check(name, p, atomic_t)
-
-/* ---------------------------------------------------------------------- */
-
-/* debug print */
-
-#define AuDbg(fmt, ...) do { \
- if (au_debug_test()) \
- pr_debug("DEBUG: " fmt, ##__VA_ARGS__); \
-} while (0)
-#define AuLabel(l) AuDbg(#l "\n")
-#define AuIOErr(fmt, ...) pr_err("I/O Error, " fmt, ##__VA_ARGS__)
-#define AuWarn1(fmt, ...) do { \
- static unsigned char _c; \
- if (!_c++) \
- pr_warn(fmt, ##__VA_ARGS__); \
-} while (0)
-
-#define AuErr1(fmt, ...) do { \
- static unsigned char _c; \
- if (!_c++) \
- pr_err(fmt, ##__VA_ARGS__); \
-} while (0)
-
-#define AuIOErr1(fmt, ...) do { \
- static unsigned char _c; \
- if (!_c++) \
- AuIOErr(fmt, ##__VA_ARGS__); \
-} while (0)
-
-#define AuUnsupportMsg "This operation is not supported." \
- " Please report this application to aufs-users ML."
-#define AuUnsupport(fmt, ...) do { \
- pr_err(AuUnsupportMsg "\n" fmt, ##__VA_ARGS__); \
- dump_stack(); \
-} while (0)
-
-#define AuTraceErr(e) do { \
- if (unlikely((e) < 0)) \
- AuDbg("err %d\n", (int)(e)); \
-} while (0)
-
-#define AuTraceErrPtr(p) do { \
- if (IS_ERR(p)) \
- AuDbg("err %ld\n", PTR_ERR(p)); \
-} while (0)
-
-/* dirty macros for debug print, use with "%.*s" and caution */
-#define AuLNPair(qstr) (qstr)->len, (qstr)->name
-
-/* ---------------------------------------------------------------------- */
-
-struct dentry;
-#ifdef CONFIG_AUFS_DEBUG
-extern struct mutex au_dbg_mtx;
-extern char *au_plevel;
-struct au_nhash;
-void au_dpri_whlist(struct au_nhash *whlist);
-struct au_vdir;
-void au_dpri_vdir(struct au_vdir *vdir);
-struct inode;
-void au_dpri_inode(struct inode *inode);
-void au_dpri_dalias(struct inode *inode);
-void au_dpri_dentry(struct dentry *dentry);
-struct file;
-void au_dpri_file(struct file *filp);
-struct super_block;
-void au_dpri_sb(struct super_block *sb);
-
-#define au_dbg_verify_dinode(d) __au_dbg_verify_dinode(d, __func__, __LINE__)
-void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line);
-void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen);
-void au_dbg_verify_kthread(void);
-
-int __init au_debug_init(void);
-
-#define AuDbgWhlist(w) do { \
- mutex_lock(&au_dbg_mtx); \
- AuDbg(#w "\n"); \
- au_dpri_whlist(w); \
- mutex_unlock(&au_dbg_mtx); \
-} while (0)
-
-#define AuDbgVdir(v) do { \
- mutex_lock(&au_dbg_mtx); \
- AuDbg(#v "\n"); \
- au_dpri_vdir(v); \
- mutex_unlock(&au_dbg_mtx); \
-} while (0)
-
-#define AuDbgInode(i) do { \
- mutex_lock(&au_dbg_mtx); \
- AuDbg(#i "\n"); \
- au_dpri_inode(i); \
- mutex_unlock(&au_dbg_mtx); \
-} while (0)
-
-#define AuDbgDAlias(i) do { \
- mutex_lock(&au_dbg_mtx); \
- AuDbg(#i "\n"); \
- au_dpri_dalias(i); \
- mutex_unlock(&au_dbg_mtx); \
-} while (0)
-
-#define AuDbgDentry(d) do { \
- mutex_lock(&au_dbg_mtx); \
- AuDbg(#d "\n"); \
- au_dpri_dentry(d); \
- mutex_unlock(&au_dbg_mtx); \
-} while (0)
-
-#define AuDbgFile(f) do { \
- mutex_lock(&au_dbg_mtx); \
- AuDbg(#f "\n"); \
- au_dpri_file(f); \
- mutex_unlock(&au_dbg_mtx); \
-} while (0)
-
-#define AuDbgSb(sb) do { \
- mutex_lock(&au_dbg_mtx); \
- AuDbg(#sb "\n"); \
- au_dpri_sb(sb); \
- mutex_unlock(&au_dbg_mtx); \
-} while (0)
-
-#define AuDbgSym(addr) do { \
- char sym[KSYM_SYMBOL_LEN]; \
- sprint_symbol(sym, (unsigned long)addr); \
- AuDbg("%s\n", sym); \
-} while (0)
-#else
-AuStubVoid(au_dbg_verify_dinode, struct dentry *dentry)
-AuStubVoid(au_dbg_verify_gen, struct dentry *parent, unsigned int sigen)
-AuStubVoid(au_dbg_verify_kthread, void)
-AuStubInt0(__init au_debug_init, void)
-
-#define AuDbgWhlist(w) do {} while (0)
-#define AuDbgVdir(v) do {} while (0)
-#define AuDbgInode(i) do {} while (0)
-#define AuDbgDAlias(i) do {} while (0)
-#define AuDbgDentry(d) do {} while (0)
-#define AuDbgFile(f) do {} while (0)
-#define AuDbgSb(sb) do {} while (0)
-#define AuDbgSym(addr) do {} while (0)
-#endif /* CONFIG_AUFS_DEBUG */
-
-/* ---------------------------------------------------------------------- */
-
-#ifdef CONFIG_AUFS_MAGIC_SYSRQ
-int __init au_sysrq_init(void);
-void au_sysrq_fin(void);
-
-#ifdef CONFIG_HW_CONSOLE
-#define au_dbg_blocked() do { \
- WARN_ON(1); \
- handle_sysrq('w'); \
-} while (0)
-#else
-AuStubVoid(au_dbg_blocked, void)
-#endif
-
-#else
-AuStubInt0(__init au_sysrq_init, void)
-AuStubVoid(au_sysrq_fin, void)
-AuStubVoid(au_dbg_blocked, void)
-#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
-
-#endif /* __KERNEL__ */
-#endif /* __AUFS_DEBUG_H__ */
diff --git a/fs/aufs/dentry.c b/fs/aufs/dentry.c
deleted file mode 100644
index e47a7e6c4..000000000
--- a/fs/aufs/dentry.c
+++ /dev/null
@@ -1,1123 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * lookup and dentry operations
- */
-
-#include <linux/namei.h>
-#include "aufs.h"
-
-#define AuLkup_ALLOW_NEG 1
-#define AuLkup_IGNORE_PERM (1 << 1)
-#define au_ftest_lkup(flags, name) ((flags) & AuLkup_##name)
-#define au_fset_lkup(flags, name) \
- do { (flags) |= AuLkup_##name; } while (0)
-#define au_fclr_lkup(flags, name) \
- do { (flags) &= ~AuLkup_##name; } while (0)
-
-struct au_do_lookup_args {
- unsigned int flags;
- mode_t type;
-};
-
-/*
- * returns positive/negative dentry, NULL or an error.
- * NULL means whiteout-ed or not-found.
- */
-static struct dentry*
-au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
- aufs_bindex_t bindex, struct qstr *wh_name,
- struct au_do_lookup_args *args)
-{
- struct dentry *h_dentry;
- struct inode *h_inode;
- struct au_branch *br;
- int wh_found, opq;
- unsigned char wh_able;
- const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG);
- const unsigned char ignore_perm = !!au_ftest_lkup(args->flags,
- IGNORE_PERM);
-
- wh_found = 0;
- br = au_sbr(dentry->d_sb, bindex);
- wh_able = !!au_br_whable(br->br_perm);
- if (wh_able)
- wh_found = au_wh_test(h_parent, wh_name, /*try_sio*/0);
- h_dentry = ERR_PTR(wh_found);
- if (!wh_found)
- goto real_lookup;
- if (unlikely(wh_found < 0))
- goto out;
-
- /* We found a whiteout */
- /* au_set_dbend(dentry, bindex); */
- au_set_dbwh(dentry, bindex);
- if (!allow_neg)
- return NULL; /* success */
-
-real_lookup:
- if (!ignore_perm)
- h_dentry = vfsub_lkup_one(&dentry->d_name, h_parent);
- else
- h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent);
- if (IS_ERR(h_dentry)) {
- if (PTR_ERR(h_dentry) == -ENAMETOOLONG
- && !allow_neg)
- h_dentry = NULL;
- goto out;
- }
-
- h_inode = d_inode(h_dentry);
- if (d_is_negative(h_dentry)) {
- if (!allow_neg)
- goto out_neg;
- } else if (wh_found
- || (args->type && args->type != (h_inode->i_mode & S_IFMT)))
- goto out_neg;
-
- if (au_dbend(dentry) <= bindex)
- au_set_dbend(dentry, bindex);
- if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
- au_set_dbstart(dentry, bindex);
- au_set_h_dptr(dentry, bindex, h_dentry);
-
- if (!d_is_dir(h_dentry)
- || !wh_able
- || (d_really_is_positive(dentry) && !d_is_dir(dentry)))
- goto out; /* success */
-
- mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
- opq = au_diropq_test(h_dentry);
- mutex_unlock(&h_inode->i_mutex);
- if (opq > 0)
- au_set_dbdiropq(dentry, bindex);
- else if (unlikely(opq < 0)) {
- au_set_h_dptr(dentry, bindex, NULL);
- h_dentry = ERR_PTR(opq);
- }
- goto out;
-
-out_neg:
- dput(h_dentry);
- h_dentry = NULL;
-out:
- return h_dentry;
-}
-
-static int au_test_shwh(struct super_block *sb, const struct qstr *name)
-{
- if (unlikely(!au_opt_test(au_mntflags(sb), SHWH)
- && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)))
- return -EPERM;
- return 0;
-}
-
-/*
- * returns the number of lower positive dentries,
- * otherwise an error.
- * can be called at unlinking with @type is zero.
- */
-int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type)
-{
- int npositive, err;
- aufs_bindex_t bindex, btail, bdiropq;
- unsigned char isdir, dirperm1;
- struct qstr whname;
- struct au_do_lookup_args args = {
- .flags = 0,
- .type = type
- };
- const struct qstr *name = &dentry->d_name;
- struct dentry *parent;
- struct super_block *sb;
-
- sb = dentry->d_sb;
- err = au_test_shwh(sb, name);
- if (unlikely(err))
- goto out;
-
- err = au_wh_name_alloc(&whname, name);
- if (unlikely(err))
- goto out;
-
- isdir = !!d_is_dir(dentry);
- if (!type)
- au_fset_lkup(args.flags, ALLOW_NEG);
- dirperm1 = !!au_opt_test(au_mntflags(sb), DIRPERM1);
-
- npositive = 0;
- parent = dget_parent(dentry);
- btail = au_dbtaildir(parent);
- for (bindex = bstart; bindex <= btail; bindex++) {
- struct dentry *h_parent, *h_dentry;
- struct inode *h_inode, *h_dir;
-
- h_dentry = au_h_dptr(dentry, bindex);
- if (h_dentry) {
- if (d_is_positive(h_dentry))
- npositive++;
- if (type != S_IFDIR)
- break;
- continue;
- }
- h_parent = au_h_dptr(parent, bindex);
- if (!h_parent || !d_is_dir(h_parent))
- continue;
-
- h_dir = d_inode(h_parent);
- mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
- h_dentry = au_do_lookup(h_parent, dentry, bindex, &whname,
- &args);
- mutex_unlock(&h_dir->i_mutex);
- err = PTR_ERR(h_dentry);
- if (IS_ERR(h_dentry))
- goto out_parent;
- if (h_dentry)
- au_fclr_lkup(args.flags, ALLOW_NEG);
- if (dirperm1)
- au_fset_lkup(args.flags, IGNORE_PERM);
-
- if (au_dbwh(dentry) == bindex)
- break;
- if (!h_dentry)
- continue;
- if (d_is_negative(h_dentry))
- continue;
- h_inode = d_inode(h_dentry);
- npositive++;
- if (!args.type)
- args.type = h_inode->i_mode & S_IFMT;
- if (args.type != S_IFDIR)
- break;
- else if (isdir) {
- /* the type of lower may be different */
- bdiropq = au_dbdiropq(dentry);
- if (bdiropq >= 0 && bdiropq <= bindex)
- break;
- }
- }
-
- if (npositive) {
- AuLabel(positive);
- au_update_dbstart(dentry);
- }
- err = npositive;
- if (unlikely(!au_opt_test(au_mntflags(sb), UDBA_NONE)
- && au_dbstart(dentry) < 0)) {
- err = -EIO;
- AuIOErr("both of real entry and whiteout found, %pd, err %d\n",
- dentry, err);
- }
-
-out_parent:
- dput(parent);
- kfree(whname.name);
-out:
- return err;
-}
-
-struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent)
-{
- struct dentry *dentry;
- int wkq_err;
-
- if (!au_test_h_perm_sio(d_inode(parent), MAY_EXEC))
- dentry = vfsub_lkup_one(name, parent);
- else {
- struct vfsub_lkup_one_args args = {
- .errp = &dentry,
- .name = name,
- .parent = parent
- };
-
- wkq_err = au_wkq_wait(vfsub_call_lkup_one, &args);
- if (unlikely(wkq_err))
- dentry = ERR_PTR(wkq_err);
- }
-
- return dentry;
-}
-
-/*
- * lookup @dentry on @bindex which should be negative.
- */
-int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh)
-{
- int err;
- struct dentry *parent, *h_parent, *h_dentry;
- struct au_branch *br;
-
- parent = dget_parent(dentry);
- h_parent = au_h_dptr(parent, bindex);
- br = au_sbr(dentry->d_sb, bindex);
- if (wh)
- h_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
- else
- h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent);
- err = PTR_ERR(h_dentry);
- if (IS_ERR(h_dentry))
- goto out;
- if (unlikely(d_is_positive(h_dentry))) {
- err = -EIO;
- AuIOErr("%pd should be negative on b%d.\n", h_dentry, bindex);
- dput(h_dentry);
- goto out;
- }
-
- err = 0;
- if (bindex < au_dbstart(dentry))
- au_set_dbstart(dentry, bindex);
- if (au_dbend(dentry) < bindex)
- au_set_dbend(dentry, bindex);
- au_set_h_dptr(dentry, bindex, h_dentry);
-
-out:
- dput(parent);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* subset of struct inode */
-struct au_iattr {
- unsigned long i_ino;
- /* unsigned int i_nlink; */
- kuid_t i_uid;
- kgid_t i_gid;
- u64 i_version;
-/*
- loff_t i_size;
- blkcnt_t i_blocks;
-*/
- umode_t i_mode;
-};
-
-static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode)
-{
- ia->i_ino = h_inode->i_ino;
- /* ia->i_nlink = h_inode->i_nlink; */
- ia->i_uid = h_inode->i_uid;
- ia->i_gid = h_inode->i_gid;
- ia->i_version = h_inode->i_version;
-/*
- ia->i_size = h_inode->i_size;
- ia->i_blocks = h_inode->i_blocks;
-*/
- ia->i_mode = (h_inode->i_mode & S_IFMT);
-}
-
-static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode)
-{
- return ia->i_ino != h_inode->i_ino
- /* || ia->i_nlink != h_inode->i_nlink */
- || !uid_eq(ia->i_uid, h_inode->i_uid)
- || !gid_eq(ia->i_gid, h_inode->i_gid)
- || ia->i_version != h_inode->i_version
-/*
- || ia->i_size != h_inode->i_size
- || ia->i_blocks != h_inode->i_blocks
-*/
- || ia->i_mode != (h_inode->i_mode & S_IFMT);
-}
-
-static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
- struct au_branch *br)
-{
- int err;
- struct au_iattr ia;
- struct inode *h_inode;
- struct dentry *h_d;
- struct super_block *h_sb;
-
- err = 0;
- memset(&ia, -1, sizeof(ia));
- h_sb = h_dentry->d_sb;
- h_inode = NULL;
- if (d_is_positive(h_dentry)) {
- h_inode = d_inode(h_dentry);
- au_iattr_save(&ia, h_inode);
- } else if (au_test_nfs(h_sb) || au_test_fuse(h_sb))
- /* nfs d_revalidate may return 0 for negative dentry */
- /* fuse d_revalidate always return 0 for negative dentry */
- goto out;
-
- /* main purpose is namei.c:cached_lookup() and d_revalidate */
- h_d = vfsub_lkup_one(&h_dentry->d_name, h_parent);
- err = PTR_ERR(h_d);
- if (IS_ERR(h_d))
- goto out;
-
- err = 0;
- if (unlikely(h_d != h_dentry
- || d_inode(h_d) != h_inode
- || (h_inode && au_iattr_test(&ia, h_inode))))
- err = au_busy_or_stale();
- dput(h_d);
-
-out:
- AuTraceErr(err);
- return err;
-}
-
-int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
- struct dentry *h_parent, struct au_branch *br)
-{
- int err;
-
- err = 0;
- if (udba == AuOpt_UDBA_REVAL
- && !au_test_fs_remote(h_dentry->d_sb)) {
- IMustLock(h_dir);
- err = (d_inode(h_dentry->d_parent) != h_dir);
- } else if (udba != AuOpt_UDBA_NONE)
- err = au_h_verify_dentry(h_dentry, h_parent, br);
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent)
-{
- int err;
- aufs_bindex_t new_bindex, bindex, bend, bwh, bdiropq;
- struct au_hdentry tmp, *p, *q;
- struct au_dinfo *dinfo;
- struct super_block *sb;
-
- DiMustWriteLock(dentry);
-
- sb = dentry->d_sb;
- dinfo = au_di(dentry);
- bend = dinfo->di_bend;
- bwh = dinfo->di_bwh;
- bdiropq = dinfo->di_bdiropq;
- p = dinfo->di_hdentry + dinfo->di_bstart;
- for (bindex = dinfo->di_bstart; bindex <= bend; bindex++, p++) {
- if (!p->hd_dentry)
- continue;
-
- new_bindex = au_br_index(sb, p->hd_id);
- if (new_bindex == bindex)
- continue;
-
- if (dinfo->di_bwh == bindex)
- bwh = new_bindex;
- if (dinfo->di_bdiropq == bindex)
- bdiropq = new_bindex;
- if (new_bindex < 0) {
- au_hdput(p);
- p->hd_dentry = NULL;
- continue;
- }
-
- /* swap two lower dentries, and loop again */
- q = dinfo->di_hdentry + new_bindex;
- tmp = *q;
- *q = *p;
- *p = tmp;
- if (tmp.hd_dentry) {
- bindex--;
- p--;
- }
- }
-
- dinfo->di_bwh = -1;
- if (bwh >= 0 && bwh <= au_sbend(sb) && au_sbr_whable(sb, bwh))
- dinfo->di_bwh = bwh;
-
- dinfo->di_bdiropq = -1;
- if (bdiropq >= 0
- && bdiropq <= au_sbend(sb)
- && au_sbr_whable(sb, bdiropq))
- dinfo->di_bdiropq = bdiropq;
-
- err = -EIO;
- dinfo->di_bstart = -1;
- dinfo->di_bend = -1;
- bend = au_dbend(parent);
- p = dinfo->di_hdentry;
- for (bindex = 0; bindex <= bend; bindex++, p++)
- if (p->hd_dentry) {
- dinfo->di_bstart = bindex;
- break;
- }
-
- if (dinfo->di_bstart >= 0) {
- p = dinfo->di_hdentry + bend;
- for (bindex = bend; bindex >= 0; bindex--, p--)
- if (p->hd_dentry) {
- dinfo->di_bend = bindex;
- err = 0;
- break;
- }
- }
-
- return err;
-}
-
-static void au_do_hide(struct dentry *dentry)
-{
- struct inode *inode;
-
- if (d_really_is_positive(dentry)) {
- inode = d_inode(dentry);
- if (!d_is_dir(dentry)) {
- if (inode->i_nlink && !d_unhashed(dentry))
- drop_nlink(inode);
- } else {
- clear_nlink(inode);
- /* stop next lookup */
- inode->i_flags |= S_DEAD;
- }
- smp_mb(); /* necessary? */
- }
- d_drop(dentry);
-}
-
-static int au_hide_children(struct dentry *parent)
-{
- int err, i, j, ndentry;
- struct au_dcsub_pages dpages;
- struct au_dpage *dpage;
- struct dentry *dentry;
-
- err = au_dpages_init(&dpages, GFP_NOFS);
- if (unlikely(err))
- goto out;
- err = au_dcsub_pages(&dpages, parent, NULL, NULL);
- if (unlikely(err))
- goto out_dpages;
-
- /* in reverse order */
- for (i = dpages.ndpage - 1; i >= 0; i--) {
- dpage = dpages.dpages + i;
- ndentry = dpage->ndentry;
- for (j = ndentry - 1; j >= 0; j--) {
- dentry = dpage->dentries[j];
- if (dentry != parent)
- au_do_hide(dentry);
- }
- }
-
-out_dpages:
- au_dpages_free(&dpages);
-out:
- return err;
-}
-
-static void au_hide(struct dentry *dentry)
-{
- int err;
-
- AuDbgDentry(dentry);
- if (d_is_dir(dentry)) {
- /* shrink_dcache_parent(dentry); */
- err = au_hide_children(dentry);
- if (unlikely(err))
- AuIOErr("%pd, failed hiding children, ignored %d\n",
- dentry, err);
- }
- au_do_hide(dentry);
-}
-
-/*
- * By adding a dirty branch, a cached dentry may be affected in various ways.
- *
- * a dirty branch is added
- * - on the top of layers
- * - in the middle of layers
- * - to the bottom of layers
- *
- * on the added branch there exists
- * - a whiteout
- * - a diropq
- * - a same named entry
- * + exist
- * * negative --> positive
- * * positive --> positive
- * - type is unchanged
- * - type is changed
- * + doesn't exist
- * * negative --> negative
- * * positive --> negative (rejected by au_br_del() for non-dir case)
- * - none
- */
-static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
- struct au_dinfo *tmp)
-{
- int err;
- aufs_bindex_t bindex, bend;
- struct {
- struct dentry *dentry;
- struct inode *inode;
- mode_t mode;
- } orig_h, tmp_h = {
- .dentry = NULL
- };
- struct au_hdentry *hd;
- struct inode *inode, *h_inode;
- struct dentry *h_dentry;
-
- err = 0;
- AuDebugOn(dinfo->di_bstart < 0);
- orig_h.mode = 0;
- orig_h.dentry = dinfo->di_hdentry[dinfo->di_bstart].hd_dentry;
- orig_h.inode = NULL;
- if (d_is_positive(orig_h.dentry)) {
- orig_h.inode = d_inode(orig_h.dentry);
- orig_h.mode = orig_h.inode->i_mode & S_IFMT;
- }
- if (tmp->di_bstart >= 0) {
- tmp_h.dentry = tmp->di_hdentry[tmp->di_bstart].hd_dentry;
- if (d_is_positive(tmp_h.dentry)) {
- tmp_h.inode = d_inode(tmp_h.dentry);
- tmp_h.mode = tmp_h.inode->i_mode & S_IFMT;
- }
- }
-
- inode = NULL;
- if (d_really_is_positive(dentry))
- inode = d_inode(dentry);
- if (!orig_h.inode) {
- AuDbg("nagative originally\n");
- if (inode) {
- au_hide(dentry);
- goto out;
- }
- AuDebugOn(inode);
- AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
- AuDebugOn(dinfo->di_bdiropq != -1);
-
- if (!tmp_h.inode) {
- AuDbg("negative --> negative\n");
- /* should have only one negative lower */
- if (tmp->di_bstart >= 0
- && tmp->di_bstart < dinfo->di_bstart) {
- AuDebugOn(tmp->di_bstart != tmp->di_bend);
- AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
- au_set_h_dptr(dentry, dinfo->di_bstart, NULL);
- au_di_cp(dinfo, tmp);
- hd = tmp->di_hdentry + tmp->di_bstart;
- au_set_h_dptr(dentry, tmp->di_bstart,
- dget(hd->hd_dentry));
- }
- au_dbg_verify_dinode(dentry);
- } else {
- AuDbg("negative --> positive\n");
- /*
- * similar to the behaviour of creating with bypassing
- * aufs.
- * unhash it in order to force an error in the
- * succeeding create operation.
- * we should not set S_DEAD here.
- */
- d_drop(dentry);
- /* au_di_swap(tmp, dinfo); */
- au_dbg_verify_dinode(dentry);
- }
- } else {
- AuDbg("positive originally\n");
- /* inode may be NULL */
- AuDebugOn(inode && (inode->i_mode & S_IFMT) != orig_h.mode);
- if (!tmp_h.inode) {
- AuDbg("positive --> negative\n");
- /* or bypassing aufs */
- au_hide(dentry);
- if (tmp->di_bwh >= 0 && tmp->di_bwh <= dinfo->di_bstart)
- dinfo->di_bwh = tmp->di_bwh;
- if (inode)
- err = au_refresh_hinode_self(inode);
- au_dbg_verify_dinode(dentry);
- } else if (orig_h.mode == tmp_h.mode) {
- AuDbg("positive --> positive, same type\n");
- if (!S_ISDIR(orig_h.mode)
- && dinfo->di_bstart > tmp->di_bstart) {
- /*
- * similar to the behaviour of removing and
- * creating.
- */
- au_hide(dentry);
- if (inode)
- err = au_refresh_hinode_self(inode);
- au_dbg_verify_dinode(dentry);
- } else {
- /* fill empty slots */
- if (dinfo->di_bstart > tmp->di_bstart)
- dinfo->di_bstart = tmp->di_bstart;
- if (dinfo->di_bend < tmp->di_bend)
- dinfo->di_bend = tmp->di_bend;
- dinfo->di_bwh = tmp->di_bwh;
- dinfo->di_bdiropq = tmp->di_bdiropq;
- hd = tmp->di_hdentry;
- bend = dinfo->di_bend;
- for (bindex = tmp->di_bstart; bindex <= bend;
- bindex++) {
- if (au_h_dptr(dentry, bindex))
- continue;
- h_dentry = hd[bindex].hd_dentry;
- if (!h_dentry)
- continue;
- AuDebugOn(d_is_negative(h_dentry));
- h_inode = d_inode(h_dentry);
- AuDebugOn(orig_h.mode
- != (h_inode->i_mode
- & S_IFMT));
- au_set_h_dptr(dentry, bindex,
- dget(h_dentry));
- }
- err = au_refresh_hinode(inode, dentry);
- au_dbg_verify_dinode(dentry);
- }
- } else {
- AuDbg("positive --> positive, different type\n");
- /* similar to the behaviour of removing and creating */
- au_hide(dentry);
- if (inode)
- err = au_refresh_hinode_self(inode);
- au_dbg_verify_dinode(dentry);
- }
- }
-
-out:
- return err;
-}
-
-void au_refresh_dop(struct dentry *dentry, int force_reval)
-{
- const struct dentry_operations *dop
- = force_reval ? &aufs_dop : dentry->d_sb->s_d_op;
- static const unsigned int mask
- = DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE;
-
- BUILD_BUG_ON(sizeof(mask) != sizeof(dentry->d_flags));
-
- if (dentry->d_op == dop)
- return;
-
- AuDbg("%pd\n", dentry);
- spin_lock(&dentry->d_lock);
- if (dop == &aufs_dop)
- dentry->d_flags |= mask;
- else
- dentry->d_flags &= ~mask;
- dentry->d_op = dop;
- spin_unlock(&dentry->d_lock);
-}
-
-int au_refresh_dentry(struct dentry *dentry, struct dentry *parent)
-{
- int err, ebrange;
- unsigned int sigen;
- struct au_dinfo *dinfo, *tmp;
- struct super_block *sb;
- struct inode *inode;
-
- DiMustWriteLock(dentry);
- AuDebugOn(IS_ROOT(dentry));
- AuDebugOn(d_really_is_negative(parent));
-
- sb = dentry->d_sb;
- sigen = au_sigen(sb);
- err = au_digen_test(parent, sigen);
- if (unlikely(err))
- goto out;
-
- dinfo = au_di(dentry);
- err = au_di_realloc(dinfo, au_sbend(sb) + 1);
- if (unlikely(err))
- goto out;
- ebrange = au_dbrange_test(dentry);
- if (!ebrange)
- ebrange = au_do_refresh_hdentry(dentry, parent);
-
- if (d_unhashed(dentry) || ebrange /* || dinfo->di_tmpfile */) {
- AuDebugOn(au_dbstart(dentry) < 0 && au_dbend(dentry) >= 0);
- if (d_really_is_positive(dentry)) {
- inode = d_inode(dentry);
- err = au_refresh_hinode_self(inode);
- }
- au_dbg_verify_dinode(dentry);
- if (!err)
- goto out_dgen; /* success */
- goto out;
- }
-
- /* temporary dinfo */
- AuDbgDentry(dentry);
- err = -ENOMEM;
- tmp = au_di_alloc(sb, AuLsc_DI_TMP);
- if (unlikely(!tmp))
- goto out;
- au_di_swap(tmp, dinfo);
- /* returns the number of positive dentries */
- /*
- * if current working dir is removed, it returns an error.
- * but the dentry is legal.
- */
- err = au_lkup_dentry(dentry, /*bstart*/0, /*type*/0);
- AuDbgDentry(dentry);
- au_di_swap(tmp, dinfo);
- if (err == -ENOENT)
- err = 0;
- if (err >= 0) {
- /* compare/refresh by dinfo */
- AuDbgDentry(dentry);
- err = au_refresh_by_dinfo(dentry, dinfo, tmp);
- au_dbg_verify_dinode(dentry);
- AuTraceErr(err);
- }
- au_rw_write_unlock(&tmp->di_rwsem);
- au_di_free(tmp);
- if (unlikely(err))
- goto out;
-
-out_dgen:
- au_update_digen(dentry);
-out:
- if (unlikely(err && !(dentry->d_flags & DCACHE_NFSFS_RENAMED))) {
- AuIOErr("failed refreshing %pd, %d\n", dentry, err);
- AuDbgDentry(dentry);
- }
- AuTraceErr(err);
- return err;
-}
-
-static int au_do_h_d_reval(struct dentry *h_dentry, unsigned int flags,
- struct dentry *dentry, aufs_bindex_t bindex)
-{
- int err, valid;
-
- err = 0;
- if (!(h_dentry->d_flags & DCACHE_OP_REVALIDATE))
- goto out;
-
- AuDbg("b%d\n", bindex);
- /*
- * gave up supporting LOOKUP_CREATE/OPEN for lower fs,
- * due to whiteout and branch permission.
- */
- flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE
- | LOOKUP_FOLLOW | LOOKUP_EXCL);
- /* it may return tri-state */
- valid = h_dentry->d_op->d_revalidate(h_dentry, flags);
-
- if (unlikely(valid < 0))
- err = valid;
- else if (!valid)
- err = -EINVAL;
-
-out:
- AuTraceErr(err);
- return err;
-}
-
-/* todo: remove this */
-static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
- unsigned int flags, int do_udba)
-{
- int err;
- umode_t mode, h_mode;
- aufs_bindex_t bindex, btail, bstart, ibs, ibe;
- unsigned char plus, unhashed, is_root, h_plus, h_nfs, tmpfile;
- struct inode *h_inode, *h_cached_inode;
- struct dentry *h_dentry;
- struct qstr *name, *h_name;
-
- err = 0;
- plus = 0;
- mode = 0;
- ibs = -1;
- ibe = -1;
- unhashed = !!d_unhashed(dentry);
- is_root = !!IS_ROOT(dentry);
- name = &dentry->d_name;
- tmpfile = au_di(dentry)->di_tmpfile;
-
- /*
- * Theoretically, REVAL test should be unnecessary in case of
- * {FS,I}NOTIFY.
- * But {fs,i}notify doesn't fire some necessary events,
- * IN_ATTRIB for atime/nlink/pageio
- * Let's do REVAL test too.
- */
- if (do_udba && inode) {
- mode = (inode->i_mode & S_IFMT);
- plus = (inode->i_nlink > 0);
- ibs = au_ibstart(inode);
- ibe = au_ibend(inode);
- }
-
- bstart = au_dbstart(dentry);
- btail = bstart;
- if (inode && S_ISDIR(inode->i_mode))
- btail = au_dbtaildir(dentry);
- for (bindex = bstart; bindex <= btail; bindex++) {
- h_dentry = au_h_dptr(dentry, bindex);
- if (!h_dentry)
- continue;
-
- AuDbg("b%d, %pd\n", bindex, h_dentry);
- h_nfs = !!au_test_nfs(h_dentry->d_sb);
- spin_lock(&h_dentry->d_lock);
- h_name = &h_dentry->d_name;
- if (unlikely(do_udba
- && !is_root
- && ((!h_nfs
- && (unhashed != !!d_unhashed(h_dentry)
- || (!tmpfile
- && !au_qstreq(name, h_name))
- ))
- || (h_nfs
- && !(flags & LOOKUP_OPEN)
- && (h_dentry->d_flags
- & DCACHE_NFSFS_RENAMED)))
- )) {
- int h_unhashed;
-
- h_unhashed = d_unhashed(h_dentry);
- spin_unlock(&h_dentry->d_lock);
- AuDbg("unhash 0x%x 0x%x, %pd %pd\n",
- unhashed, h_unhashed, dentry, h_dentry);
- goto err;
- }
- spin_unlock(&h_dentry->d_lock);
-
- err = au_do_h_d_reval(h_dentry, flags, dentry, bindex);
- if (unlikely(err))
- /* do not goto err, to keep the errno */
- break;
-
- /* todo: plink too? */
- if (!do_udba)
- continue;
-
- /* UDBA tests */
- if (unlikely(!!inode != d_is_positive(h_dentry)))
- goto err;
-
- h_inode = NULL;
- if (d_is_positive(h_dentry))
- h_inode = d_inode(h_dentry);
- h_plus = plus;
- h_mode = mode;
- h_cached_inode = h_inode;
- if (h_inode) {
- h_mode = (h_inode->i_mode & S_IFMT);
- h_plus = (h_inode->i_nlink > 0);
- }
- if (inode && ibs <= bindex && bindex <= ibe)
- h_cached_inode = au_h_iptr(inode, bindex);
-
- if (!h_nfs) {
- if (unlikely(plus != h_plus && !tmpfile))
- goto err;
- } else {
- if (unlikely(!(h_dentry->d_flags & DCACHE_NFSFS_RENAMED)
- && !is_root
- && !IS_ROOT(h_dentry)
- && unhashed != d_unhashed(h_dentry)))
- goto err;
- }
- if (unlikely(mode != h_mode
- || h_cached_inode != h_inode))
- goto err;
- continue;
-
-err:
- err = -EINVAL;
- break;
- }
-
- AuTraceErr(err);
- return err;
-}
-
-/* todo: consolidate with do_refresh() and au_reval_for_attr() */
-static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen)
-{
- int err;
- struct dentry *parent;
-
- if (!au_digen_test(dentry, sigen))
- return 0;
-
- parent = dget_parent(dentry);
- di_read_lock_parent(parent, AuLock_IR);
- AuDebugOn(au_digen_test(parent, sigen));
- au_dbg_verify_gen(parent, sigen);
- err = au_refresh_dentry(dentry, parent);
- di_read_unlock(parent, AuLock_IR);
- dput(parent);
- AuTraceErr(err);
- return err;
-}
-
-int au_reval_dpath(struct dentry *dentry, unsigned int sigen)
-{
- int err;
- struct dentry *d, *parent;
-
- if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR))
- return simple_reval_dpath(dentry, sigen);
-
- /* slow loop, keep it simple and stupid */
- /* cf: au_cpup_dirs() */
- err = 0;
- parent = NULL;
- while (au_digen_test(dentry, sigen)) {
- d = dentry;
- while (1) {
- dput(parent);
- parent = dget_parent(d);
- if (!au_digen_test(parent, sigen))
- break;
- d = parent;
- }
-
- if (d != dentry)
- di_write_lock_child2(d);
-
- /* someone might update our dentry while we were sleeping */
- if (au_digen_test(d, sigen)) {
- /*
- * todo: consolidate with simple_reval_dpath(),
- * do_refresh() and au_reval_for_attr().
- */
- di_read_lock_parent(parent, AuLock_IR);
- err = au_refresh_dentry(d, parent);
- di_read_unlock(parent, AuLock_IR);
- }
-
- if (d != dentry)
- di_write_unlock(d);
- dput(parent);
- if (unlikely(err))
- break;
- }
-
- return err;
-}
-
-/*
- * if valid returns 1, otherwise 0.
- */
-static int aufs_d_revalidate(struct dentry *dentry, unsigned int flags)
-{
- int valid, err;
- unsigned int sigen;
- unsigned char do_udba;
- struct super_block *sb;
- struct inode *inode;
-
- /* todo: support rcu-walk? */
- if (flags & LOOKUP_RCU)
- return -ECHILD;
-
- valid = 0;
- if (unlikely(!au_di(dentry)))
- goto out;
-
- valid = 1;
- sb = dentry->d_sb;
- /*
- * todo: very ugly
- * i_mutex of parent dir may be held,
- * but we should not return 'invalid' due to busy.
- */
- err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW | AuLock_NOPLM);
- if (unlikely(err)) {
- valid = err;
- AuTraceErr(err);
- goto out;
- }
- inode = NULL;
- if (d_really_is_positive(dentry))
- inode = d_inode(dentry);
- if (unlikely(inode && is_bad_inode(inode))) {
- err = -EINVAL;
- AuTraceErr(err);
- goto out_dgrade;
- }
- if (unlikely(au_dbrange_test(dentry))) {
- err = -EINVAL;
- AuTraceErr(err);
- goto out_dgrade;
- }
-
- sigen = au_sigen(sb);
- if (au_digen_test(dentry, sigen)) {
- AuDebugOn(IS_ROOT(dentry));
- err = au_reval_dpath(dentry, sigen);
- if (unlikely(err)) {
- AuTraceErr(err);
- goto out_dgrade;
- }
- }
- di_downgrade_lock(dentry, AuLock_IR);
-
- err = -EINVAL;
- if (!(flags & (LOOKUP_OPEN | LOOKUP_EMPTY))
- && inode
- && !(inode->i_state && I_LINKABLE)
- && (IS_DEADDIR(inode) || !inode->i_nlink)) {
- AuTraceErr(err);
- goto out_inval;
- }
-
- do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
- if (do_udba && inode) {
- aufs_bindex_t bstart = au_ibstart(inode);
- struct inode *h_inode;
-
- if (bstart >= 0) {
- h_inode = au_h_iptr(inode, bstart);
- if (h_inode && au_test_higen(inode, h_inode)) {
- AuTraceErr(err);
- goto out_inval;
- }
- }
- }
-
- err = h_d_revalidate(dentry, inode, flags, do_udba);
- if (unlikely(!err && do_udba && au_dbstart(dentry) < 0)) {
- err = -EIO;
- AuDbg("both of real entry and whiteout found, %p, err %d\n",
- dentry, err);
- }
- goto out_inval;
-
-out_dgrade:
- di_downgrade_lock(dentry, AuLock_IR);
-out_inval:
- aufs_read_unlock(dentry, AuLock_IR);
- AuTraceErr(err);
- valid = !err;
-out:
- if (!valid) {
- AuDbg("%pd invalid, %d\n", dentry, valid);
- d_drop(dentry);
- }
- return valid;
-}
-
-static void aufs_d_release(struct dentry *dentry)
-{
- if (au_di(dentry)) {
- au_di_fin(dentry);
- au_hn_di_reinit(dentry);
- }
-}
-
-const struct dentry_operations aufs_dop = {
- .d_revalidate = aufs_d_revalidate,
- .d_weak_revalidate = aufs_d_revalidate,
- .d_release = aufs_d_release
-};
-
-/* aufs_dop without d_revalidate */
-const struct dentry_operations aufs_dop_noreval = {
- .d_release = aufs_d_release
-};
diff --git a/fs/aufs/dentry.h b/fs/aufs/dentry.h
deleted file mode 100644
index c794adf59..000000000
--- a/fs/aufs/dentry.h
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * lookup and dentry operations
- */
-
-#ifndef __AUFS_DENTRY_H__
-#define __AUFS_DENTRY_H__
-
-#ifdef __KERNEL__
-
-#include <linux/dcache.h>
-#include "rwsem.h"
-
-struct au_hdentry {
- struct dentry *hd_dentry;
- aufs_bindex_t hd_id;
-};
-
-struct au_dinfo {
- atomic_t di_generation;
-
- struct au_rwsem di_rwsem;
- aufs_bindex_t di_bstart, di_bend, di_bwh, di_bdiropq;
- unsigned char di_tmpfile; /* to allow the different name */
- struct au_hdentry *di_hdentry;
-} ____cacheline_aligned_in_smp;
-
-/* ---------------------------------------------------------------------- */
-
-/* dentry.c */
-extern const struct dentry_operations aufs_dop, aufs_dop_noreval;
-struct au_branch;
-struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent);
-int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
- struct dentry *h_parent, struct au_branch *br);
-
-int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type);
-int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh);
-int au_refresh_dentry(struct dentry *dentry, struct dentry *parent);
-int au_reval_dpath(struct dentry *dentry, unsigned int sigen);
-void au_refresh_dop(struct dentry *dentry, int force_reval);
-
-/* dinfo.c */
-void au_di_init_once(void *_di);
-struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc);
-void au_di_free(struct au_dinfo *dinfo);
-void au_di_swap(struct au_dinfo *a, struct au_dinfo *b);
-void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src);
-int au_di_init(struct dentry *dentry);
-void au_di_fin(struct dentry *dentry);
-int au_di_realloc(struct au_dinfo *dinfo, int nbr);
-
-void di_read_lock(struct dentry *d, int flags, unsigned int lsc);
-void di_read_unlock(struct dentry *d, int flags);
-void di_downgrade_lock(struct dentry *d, int flags);
-void di_write_lock(struct dentry *d, unsigned int lsc);
-void di_write_unlock(struct dentry *d);
-void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir);
-void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir);
-void di_write_unlock2(struct dentry *d1, struct dentry *d2);
-
-struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex);
-struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex);
-aufs_bindex_t au_dbtail(struct dentry *dentry);
-aufs_bindex_t au_dbtaildir(struct dentry *dentry);
-
-void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
- struct dentry *h_dentry);
-int au_digen_test(struct dentry *dentry, unsigned int sigen);
-int au_dbrange_test(struct dentry *dentry);
-void au_update_digen(struct dentry *dentry);
-void au_update_dbrange(struct dentry *dentry, int do_put_zero);
-void au_update_dbstart(struct dentry *dentry);
-void au_update_dbend(struct dentry *dentry);
-int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry);
-
-/* ---------------------------------------------------------------------- */
-
-static inline struct au_dinfo *au_di(struct dentry *dentry)
-{
- return dentry->d_fsdata;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* lock subclass for dinfo */
-enum {
- AuLsc_DI_CHILD, /* child first */
- AuLsc_DI_CHILD2, /* rename(2), link(2), and cpup at hnotify */
- AuLsc_DI_CHILD3, /* copyup dirs */
- AuLsc_DI_PARENT,
- AuLsc_DI_PARENT2,
- AuLsc_DI_PARENT3,
- AuLsc_DI_TMP /* temp for replacing dinfo */
-};
-
-/*
- * di_read_lock_child, di_write_lock_child,
- * di_read_lock_child2, di_write_lock_child2,
- * di_read_lock_child3, di_write_lock_child3,
- * di_read_lock_parent, di_write_lock_parent,
- * di_read_lock_parent2, di_write_lock_parent2,
- * di_read_lock_parent3, di_write_lock_parent3,
- */
-#define AuReadLockFunc(name, lsc) \
-static inline void di_read_lock_##name(struct dentry *d, int flags) \
-{ di_read_lock(d, flags, AuLsc_DI_##lsc); }
-
-#define AuWriteLockFunc(name, lsc) \
-static inline void di_write_lock_##name(struct dentry *d) \
-{ di_write_lock(d, AuLsc_DI_##lsc); }
-
-#define AuRWLockFuncs(name, lsc) \
- AuReadLockFunc(name, lsc) \
- AuWriteLockFunc(name, lsc)
-
-AuRWLockFuncs(child, CHILD);
-AuRWLockFuncs(child2, CHILD2);
-AuRWLockFuncs(child3, CHILD3);
-AuRWLockFuncs(parent, PARENT);
-AuRWLockFuncs(parent2, PARENT2);
-AuRWLockFuncs(parent3, PARENT3);
-
-#undef AuReadLockFunc
-#undef AuWriteLockFunc
-#undef AuRWLockFuncs
-
-#define DiMustNoWaiters(d) AuRwMustNoWaiters(&au_di(d)->di_rwsem)
-#define DiMustAnyLock(d) AuRwMustAnyLock(&au_di(d)->di_rwsem)
-#define DiMustWriteLock(d) AuRwMustWriteLock(&au_di(d)->di_rwsem)
-
-/* ---------------------------------------------------------------------- */
-
-/* todo: memory barrier? */
-static inline unsigned int au_digen(struct dentry *d)
-{
- return atomic_read(&au_di(d)->di_generation);
-}
-
-static inline void au_h_dentry_init(struct au_hdentry *hdentry)
-{
- hdentry->hd_dentry = NULL;
-}
-
-static inline void au_hdput(struct au_hdentry *hd)
-{
- if (hd)
- dput(hd->hd_dentry);
-}
-
-static inline aufs_bindex_t au_dbstart(struct dentry *dentry)
-{
- DiMustAnyLock(dentry);
- return au_di(dentry)->di_bstart;
-}
-
-static inline aufs_bindex_t au_dbend(struct dentry *dentry)
-{
- DiMustAnyLock(dentry);
- return au_di(dentry)->di_bend;
-}
-
-static inline aufs_bindex_t au_dbwh(struct dentry *dentry)
-{
- DiMustAnyLock(dentry);
- return au_di(dentry)->di_bwh;
-}
-
-static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry)
-{
- DiMustAnyLock(dentry);
- return au_di(dentry)->di_bdiropq;
-}
-
-/* todo: hard/soft set? */
-static inline void au_set_dbstart(struct dentry *dentry, aufs_bindex_t bindex)
-{
- DiMustWriteLock(dentry);
- au_di(dentry)->di_bstart = bindex;
-}
-
-static inline void au_set_dbend(struct dentry *dentry, aufs_bindex_t bindex)
-{
- DiMustWriteLock(dentry);
- au_di(dentry)->di_bend = bindex;
-}
-
-static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex)
-{
- DiMustWriteLock(dentry);
- /* dbwh can be outside of bstart - bend range */
- au_di(dentry)->di_bwh = bindex;
-}
-
-static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex)
-{
- DiMustWriteLock(dentry);
- au_di(dentry)->di_bdiropq = bindex;
-}
-
-/* ---------------------------------------------------------------------- */
-
-#ifdef CONFIG_AUFS_HNOTIFY
-static inline void au_digen_dec(struct dentry *d)
-{
- atomic_dec(&au_di(d)->di_generation);
-}
-
-static inline void au_hn_di_reinit(struct dentry *dentry)
-{
- dentry->d_fsdata = NULL;
-}
-#else
-AuStubVoid(au_hn_di_reinit, struct dentry *dentry __maybe_unused)
-#endif /* CONFIG_AUFS_HNOTIFY */
-
-#endif /* __KERNEL__ */
-#endif /* __AUFS_DENTRY_H__ */
diff --git a/fs/aufs/dinfo.c b/fs/aufs/dinfo.c
deleted file mode 100644
index ad6d045c4..000000000
--- a/fs/aufs/dinfo.c
+++ /dev/null
@@ -1,537 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * dentry private data
- */
-
-#include "aufs.h"
-
-void au_di_init_once(void *_dinfo)
-{
- struct au_dinfo *dinfo = _dinfo;
- static struct lock_class_key aufs_di;
-
- au_rw_init(&dinfo->di_rwsem);
- au_rw_class(&dinfo->di_rwsem, &aufs_di);
-}
-
-struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc)
-{
- struct au_dinfo *dinfo;
- int nbr, i;
-
- dinfo = au_cache_alloc_dinfo();
- if (unlikely(!dinfo))
- goto out;
-
- nbr = au_sbend(sb) + 1;
- if (nbr <= 0)
- nbr = 1;
- dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS);
- if (dinfo->di_hdentry) {
- au_rw_write_lock_nested(&dinfo->di_rwsem, lsc);
- dinfo->di_bstart = -1;
- dinfo->di_bend = -1;
- dinfo->di_bwh = -1;
- dinfo->di_bdiropq = -1;
- dinfo->di_tmpfile = 0;
- for (i = 0; i < nbr; i++)
- dinfo->di_hdentry[i].hd_id = -1;
- goto out;
- }
-
- au_cache_free_dinfo(dinfo);
- dinfo = NULL;
-
-out:
- return dinfo;
-}
-
-void au_di_free(struct au_dinfo *dinfo)
-{
- struct au_hdentry *p;
- aufs_bindex_t bend, bindex;
-
- /* dentry may not be revalidated */
- bindex = dinfo->di_bstart;
- if (bindex >= 0) {
- bend = dinfo->di_bend;
- p = dinfo->di_hdentry + bindex;
- while (bindex++ <= bend)
- au_hdput(p++);
- }
- kfree(dinfo->di_hdentry);
- au_cache_free_dinfo(dinfo);
-}
-
-void au_di_swap(struct au_dinfo *a, struct au_dinfo *b)
-{
- struct au_hdentry *p;
- aufs_bindex_t bi;
-
- AuRwMustWriteLock(&a->di_rwsem);
- AuRwMustWriteLock(&b->di_rwsem);
-
-#define DiSwap(v, name) \
- do { \
- v = a->di_##name; \
- a->di_##name = b->di_##name; \
- b->di_##name = v; \
- } while (0)
-
- DiSwap(p, hdentry);
- DiSwap(bi, bstart);
- DiSwap(bi, bend);
- DiSwap(bi, bwh);
- DiSwap(bi, bdiropq);
- /* smp_mb(); */
-
-#undef DiSwap
-}
-
-void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src)
-{
- AuRwMustWriteLock(&dst->di_rwsem);
- AuRwMustWriteLock(&src->di_rwsem);
-
- dst->di_bstart = src->di_bstart;
- dst->di_bend = src->di_bend;
- dst->di_bwh = src->di_bwh;
- dst->di_bdiropq = src->di_bdiropq;
- /* smp_mb(); */
-}
-
-int au_di_init(struct dentry *dentry)
-{
- int err;
- struct super_block *sb;
- struct au_dinfo *dinfo;
-
- err = 0;
- sb = dentry->d_sb;
- dinfo = au_di_alloc(sb, AuLsc_DI_CHILD);
- if (dinfo) {
- atomic_set(&dinfo->di_generation, au_sigen(sb));
- /* smp_mb(); */ /* atomic_set */
- dentry->d_fsdata = dinfo;
- } else
- err = -ENOMEM;
-
- return err;
-}
-
-void au_di_fin(struct dentry *dentry)
-{
- struct au_dinfo *dinfo;
-
- dinfo = au_di(dentry);
- AuRwDestroy(&dinfo->di_rwsem);
- au_di_free(dinfo);
-}
-
-int au_di_realloc(struct au_dinfo *dinfo, int nbr)
-{
- int err, sz;
- struct au_hdentry *hdp;
-
- AuRwMustWriteLock(&dinfo->di_rwsem);
-
- err = -ENOMEM;
- sz = sizeof(*hdp) * (dinfo->di_bend + 1);
- if (!sz)
- sz = sizeof(*hdp);
- hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS);
- if (hdp) {
- dinfo->di_hdentry = hdp;
- err = 0;
- }
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static void do_ii_write_lock(struct inode *inode, unsigned int lsc)
-{
- switch (lsc) {
- case AuLsc_DI_CHILD:
- ii_write_lock_child(inode);
- break;
- case AuLsc_DI_CHILD2:
- ii_write_lock_child2(inode);
- break;
- case AuLsc_DI_CHILD3:
- ii_write_lock_child3(inode);
- break;
- case AuLsc_DI_PARENT:
- ii_write_lock_parent(inode);
- break;
- case AuLsc_DI_PARENT2:
- ii_write_lock_parent2(inode);
- break;
- case AuLsc_DI_PARENT3:
- ii_write_lock_parent3(inode);
- break;
- default:
- BUG();
- }
-}
-
-static void do_ii_read_lock(struct inode *inode, unsigned int lsc)
-{
- switch (lsc) {
- case AuLsc_DI_CHILD:
- ii_read_lock_child(inode);
- break;
- case AuLsc_DI_CHILD2:
- ii_read_lock_child2(inode);
- break;
- case AuLsc_DI_CHILD3:
- ii_read_lock_child3(inode);
- break;
- case AuLsc_DI_PARENT:
- ii_read_lock_parent(inode);
- break;
- case AuLsc_DI_PARENT2:
- ii_read_lock_parent2(inode);
- break;
- case AuLsc_DI_PARENT3:
- ii_read_lock_parent3(inode);
- break;
- default:
- BUG();
- }
-}
-
-void di_read_lock(struct dentry *d, int flags, unsigned int lsc)
-{
- struct inode *inode;
-
- au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc);
- if (d_really_is_positive(d)) {
- inode = d_inode(d);
- if (au_ftest_lock(flags, IW))
- do_ii_write_lock(inode, lsc);
- else if (au_ftest_lock(flags, IR))
- do_ii_read_lock(inode, lsc);
- }
-}
-
-void di_read_unlock(struct dentry *d, int flags)
-{
- struct inode *inode;
-
- if (d_really_is_positive(d)) {
- inode = d_inode(d);
- if (au_ftest_lock(flags, IW)) {
- au_dbg_verify_dinode(d);
- ii_write_unlock(inode);
- } else if (au_ftest_lock(flags, IR)) {
- au_dbg_verify_dinode(d);
- ii_read_unlock(inode);
- }
- }
- au_rw_read_unlock(&au_di(d)->di_rwsem);
-}
-
-void di_downgrade_lock(struct dentry *d, int flags)
-{
- if (d_really_is_positive(d) && au_ftest_lock(flags, IR))
- ii_downgrade_lock(d_inode(d));
- au_rw_dgrade_lock(&au_di(d)->di_rwsem);
-}
-
-void di_write_lock(struct dentry *d, unsigned int lsc)
-{
- au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc);
- if (d_really_is_positive(d))
- do_ii_write_lock(d_inode(d), lsc);
-}
-
-void di_write_unlock(struct dentry *d)
-{
- au_dbg_verify_dinode(d);
- if (d_really_is_positive(d))
- ii_write_unlock(d_inode(d));
- au_rw_write_unlock(&au_di(d)->di_rwsem);
-}
-
-void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir)
-{
- AuDebugOn(d1 == d2
- || d_inode(d1) == d_inode(d2)
- || d1->d_sb != d2->d_sb);
-
- if (isdir && au_test_subdir(d1, d2)) {
- di_write_lock_child(d1);
- di_write_lock_child2(d2);
- } else {
- /* there should be no races */
- di_write_lock_child(d2);
- di_write_lock_child2(d1);
- }
-}
-
-void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir)
-{
- AuDebugOn(d1 == d2
- || d_inode(d1) == d_inode(d2)
- || d1->d_sb != d2->d_sb);
-
- if (isdir && au_test_subdir(d1, d2)) {
- di_write_lock_parent(d1);
- di_write_lock_parent2(d2);
- } else {
- /* there should be no races */
- di_write_lock_parent(d2);
- di_write_lock_parent2(d1);
- }
-}
-
-void di_write_unlock2(struct dentry *d1, struct dentry *d2)
-{
- di_write_unlock(d1);
- if (d_inode(d1) == d_inode(d2))
- au_rw_write_unlock(&au_di(d2)->di_rwsem);
- else
- di_write_unlock(d2);
-}
-
-/* ---------------------------------------------------------------------- */
-
-struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex)
-{
- struct dentry *d;
-
- DiMustAnyLock(dentry);
-
- if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
- return NULL;
- AuDebugOn(bindex < 0);
- d = au_di(dentry)->di_hdentry[0 + bindex].hd_dentry;
- AuDebugOn(d && au_dcount(d) <= 0);
- return d;
-}
-
-/*
- * extended version of au_h_dptr().
- * returns a hashed and positive (or linkable) h_dentry in bindex, NULL, or
- * error.
- */
-struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex)
-{
- struct dentry *h_dentry;
- struct inode *inode, *h_inode;
-
- AuDebugOn(d_really_is_negative(dentry));
-
- h_dentry = NULL;
- if (au_dbstart(dentry) <= bindex
- && bindex <= au_dbend(dentry))
- h_dentry = au_h_dptr(dentry, bindex);
- if (h_dentry && !au_d_linkable(h_dentry)) {
- dget(h_dentry);
- goto out; /* success */
- }
-
- inode = d_inode(dentry);
- AuDebugOn(bindex < au_ibstart(inode));
- AuDebugOn(au_ibend(inode) < bindex);
- h_inode = au_h_iptr(inode, bindex);
- h_dentry = d_find_alias(h_inode);
- if (h_dentry) {
- if (!IS_ERR(h_dentry)) {
- if (!au_d_linkable(h_dentry))
- goto out; /* success */
- dput(h_dentry);
- } else
- goto out;
- }
-
- if (au_opt_test(au_mntflags(dentry->d_sb), PLINK)) {
- h_dentry = au_plink_lkup(inode, bindex);
- AuDebugOn(!h_dentry);
- if (!IS_ERR(h_dentry)) {
- if (!au_d_hashed_positive(h_dentry))
- goto out; /* success */
- dput(h_dentry);
- h_dentry = NULL;
- }
- }
-
-out:
- AuDbgDentry(h_dentry);
- return h_dentry;
-}
-
-aufs_bindex_t au_dbtail(struct dentry *dentry)
-{
- aufs_bindex_t bend, bwh;
-
- bend = au_dbend(dentry);
- if (0 <= bend) {
- bwh = au_dbwh(dentry);
- if (!bwh)
- return bwh;
- if (0 < bwh && bwh < bend)
- return bwh - 1;
- }
- return bend;
-}
-
-aufs_bindex_t au_dbtaildir(struct dentry *dentry)
-{
- aufs_bindex_t bend, bopq;
-
- bend = au_dbtail(dentry);
- if (0 <= bend) {
- bopq = au_dbdiropq(dentry);
- if (0 <= bopq && bopq < bend)
- bend = bopq;
- }
- return bend;
-}
-
-/* ---------------------------------------------------------------------- */
-
-void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
- struct dentry *h_dentry)
-{
- struct au_hdentry *hd = au_di(dentry)->di_hdentry + bindex;
- struct au_branch *br;
-
- DiMustWriteLock(dentry);
-
- au_hdput(hd);
- hd->hd_dentry = h_dentry;
- if (h_dentry) {
- br = au_sbr(dentry->d_sb, bindex);
- hd->hd_id = br->br_id;
- }
-}
-
-int au_dbrange_test(struct dentry *dentry)
-{
- int err;
- aufs_bindex_t bstart, bend;
-
- err = 0;
- bstart = au_dbstart(dentry);
- bend = au_dbend(dentry);
- if (bstart >= 0)
- AuDebugOn(bend < 0 && bstart > bend);
- else {
- err = -EIO;
- AuDebugOn(bend >= 0);
- }
-
- return err;
-}
-
-int au_digen_test(struct dentry *dentry, unsigned int sigen)
-{
- int err;
-
- err = 0;
- if (unlikely(au_digen(dentry) != sigen
- || au_iigen_test(d_inode(dentry), sigen)))
- err = -EIO;
-
- return err;
-}
-
-void au_update_digen(struct dentry *dentry)
-{
- atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb));
- /* smp_mb(); */ /* atomic_set */
-}
-
-void au_update_dbrange(struct dentry *dentry, int do_put_zero)
-{
- struct au_dinfo *dinfo;
- struct dentry *h_d;
- struct au_hdentry *hdp;
-
- DiMustWriteLock(dentry);
-
- dinfo = au_di(dentry);
- if (!dinfo || dinfo->di_bstart < 0)
- return;
-
- hdp = dinfo->di_hdentry;
- if (do_put_zero) {
- aufs_bindex_t bindex, bend;
-
- bend = dinfo->di_bend;
- for (bindex = dinfo->di_bstart; bindex <= bend; bindex++) {
- h_d = hdp[0 + bindex].hd_dentry;
- if (h_d && d_is_negative(h_d))
- au_set_h_dptr(dentry, bindex, NULL);
- }
- }
-
- dinfo->di_bstart = -1;
- while (++dinfo->di_bstart <= dinfo->di_bend)
- if (hdp[0 + dinfo->di_bstart].hd_dentry)
- break;
- if (dinfo->di_bstart > dinfo->di_bend) {
- dinfo->di_bstart = -1;
- dinfo->di_bend = -1;
- return;
- }
-
- dinfo->di_bend++;
- while (0 <= --dinfo->di_bend)
- if (hdp[0 + dinfo->di_bend].hd_dentry)
- break;
- AuDebugOn(dinfo->di_bstart > dinfo->di_bend || dinfo->di_bend < 0);
-}
-
-void au_update_dbstart(struct dentry *dentry)
-{
- aufs_bindex_t bindex, bend;
- struct dentry *h_dentry;
-
- bend = au_dbend(dentry);
- for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
- h_dentry = au_h_dptr(dentry, bindex);
- if (!h_dentry)
- continue;
- if (d_is_positive(h_dentry)) {
- au_set_dbstart(dentry, bindex);
- return;
- }
- au_set_h_dptr(dentry, bindex, NULL);
- }
-}
-
-void au_update_dbend(struct dentry *dentry)
-{
- aufs_bindex_t bindex, bstart;
- struct dentry *h_dentry;
-
- bstart = au_dbstart(dentry);
- for (bindex = au_dbend(dentry); bindex >= bstart; bindex--) {
- h_dentry = au_h_dptr(dentry, bindex);
- if (!h_dentry)
- continue;
- if (d_is_positive(h_dentry)) {
- au_set_dbend(dentry, bindex);
- return;
- }
- au_set_h_dptr(dentry, bindex, NULL);
- }
-}
-
-int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry)
-{
- aufs_bindex_t bindex, bend;
-
- bend = au_dbend(dentry);
- for (bindex = au_dbstart(dentry); bindex <= bend; bindex++)
- if (au_h_dptr(dentry, bindex) == h_dentry)
- return bindex;
- return -1;
-}
diff --git a/fs/aufs/dir.c b/fs/aufs/dir.c
deleted file mode 100644
index a994e0862..000000000
--- a/fs/aufs/dir.c
+++ /dev/null
@@ -1,745 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * directory operations
- */
-
-#include <linux/fs_stack.h>
-#include "aufs.h"
-
-void au_add_nlink(struct inode *dir, struct inode *h_dir)
-{
- unsigned int nlink;
-
- AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
-
- nlink = dir->i_nlink;
- nlink += h_dir->i_nlink - 2;
- if (h_dir->i_nlink < 2)
- nlink += 2;
- smp_mb(); /* for i_nlink */
- /* 0 can happen in revaliding */
- set_nlink(dir, nlink);
-}
-
-void au_sub_nlink(struct inode *dir, struct inode *h_dir)
-{
- unsigned int nlink;
-
- AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
-
- nlink = dir->i_nlink;
- nlink -= h_dir->i_nlink - 2;
- if (h_dir->i_nlink < 2)
- nlink -= 2;
- smp_mb(); /* for i_nlink */
- /* nlink == 0 means the branch-fs is broken */
- set_nlink(dir, nlink);
-}
-
-loff_t au_dir_size(struct file *file, struct dentry *dentry)
-{
- loff_t sz;
- aufs_bindex_t bindex, bend;
- struct file *h_file;
- struct dentry *h_dentry;
-
- sz = 0;
- if (file) {
- AuDebugOn(!d_is_dir(file->f_path.dentry));
-
- bend = au_fbend_dir(file);
- for (bindex = au_fbstart(file);
- bindex <= bend && sz < KMALLOC_MAX_SIZE;
- bindex++) {
- h_file = au_hf_dir(file, bindex);
- if (h_file && file_inode(h_file))
- sz += vfsub_f_size_read(h_file);
- }
- } else {
- AuDebugOn(!dentry);
- AuDebugOn(!d_is_dir(dentry));
-
- bend = au_dbtaildir(dentry);
- for (bindex = au_dbstart(dentry);
- bindex <= bend && sz < KMALLOC_MAX_SIZE;
- bindex++) {
- h_dentry = au_h_dptr(dentry, bindex);
- if (h_dentry && d_is_positive(h_dentry))
- sz += i_size_read(d_inode(h_dentry));
- }
- }
- if (sz < KMALLOC_MAX_SIZE)
- sz = roundup_pow_of_two(sz);
- if (sz > KMALLOC_MAX_SIZE)
- sz = KMALLOC_MAX_SIZE;
- else if (sz < NAME_MAX) {
- BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX);
- sz = AUFS_RDBLK_DEF;
- }
- return sz;
-}
-
-struct au_dir_ts_arg {
- struct dentry *dentry;
- aufs_bindex_t brid;
-};
-
-static void au_do_dir_ts(void *arg)
-{
- struct au_dir_ts_arg *a = arg;
- struct au_dtime dt;
- struct path h_path;
- struct inode *dir, *h_dir;
- struct super_block *sb;
- struct au_branch *br;
- struct au_hinode *hdir;
- int err;
- aufs_bindex_t bstart, bindex;
-
- sb = a->dentry->d_sb;
- if (d_really_is_negative(a->dentry))
- goto out;
- /* no dir->i_mutex lock */
- aufs_read_lock(a->dentry, AuLock_DW); /* noflush */
-
- dir = d_inode(a->dentry);
- bstart = au_ibstart(dir);
- bindex = au_br_index(sb, a->brid);
- if (bindex < bstart)
- goto out_unlock;
-
- br = au_sbr(sb, bindex);
- h_path.dentry = au_h_dptr(a->dentry, bindex);
- if (!h_path.dentry)
- goto out_unlock;
- h_path.mnt = au_br_mnt(br);
- au_dtime_store(&dt, a->dentry, &h_path);
-
- br = au_sbr(sb, bstart);
- if (!au_br_writable(br->br_perm))
- goto out_unlock;
- h_path.dentry = au_h_dptr(a->dentry, bstart);
- h_path.mnt = au_br_mnt(br);
- err = vfsub_mnt_want_write(h_path.mnt);
- if (err)
- goto out_unlock;
- hdir = au_hi(dir, bstart);
- au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
- h_dir = au_h_iptr(dir, bstart);
- if (h_dir->i_nlink
- && timespec_compare(&h_dir->i_mtime, &dt.dt_mtime) < 0) {
- dt.dt_h_path = h_path;
- au_dtime_revert(&dt);
- }
- au_hn_imtx_unlock(hdir);
- vfsub_mnt_drop_write(h_path.mnt);
- au_cpup_attr_timesizes(dir);
-
-out_unlock:
- aufs_read_unlock(a->dentry, AuLock_DW);
-out:
- dput(a->dentry);
- au_nwt_done(&au_sbi(sb)->si_nowait);
- kfree(arg);
-}
-
-void au_dir_ts(struct inode *dir, aufs_bindex_t bindex)
-{
- int perm, wkq_err;
- aufs_bindex_t bstart;
- struct au_dir_ts_arg *arg;
- struct dentry *dentry;
- struct super_block *sb;
-
- IMustLock(dir);
-
- dentry = d_find_any_alias(dir);
- AuDebugOn(!dentry);
- sb = dentry->d_sb;
- bstart = au_ibstart(dir);
- if (bstart == bindex) {
- au_cpup_attr_timesizes(dir);
- goto out;
- }
-
- perm = au_sbr_perm(sb, bstart);
- if (!au_br_writable(perm))
- goto out;
-
- arg = kmalloc(sizeof(*arg), GFP_NOFS);
- if (!arg)
- goto out;
-
- arg->dentry = dget(dentry); /* will be dput-ted by au_do_dir_ts() */
- arg->brid = au_sbr_id(sb, bindex);
- wkq_err = au_wkq_nowait(au_do_dir_ts, arg, sb, /*flags*/0);
- if (unlikely(wkq_err)) {
- pr_err("wkq %d\n", wkq_err);
- dput(dentry);
- kfree(arg);
- }
-
-out:
- dput(dentry);
-}
-
-/* ---------------------------------------------------------------------- */
-
-static int reopen_dir(struct file *file)
-{
- int err;
- unsigned int flags;
- aufs_bindex_t bindex, btail, bstart;
- struct dentry *dentry, *h_dentry;
- struct file *h_file;
-
- /* open all lower dirs */
- dentry = file->f_path.dentry;
- bstart = au_dbstart(dentry);
- for (bindex = au_fbstart(file); bindex < bstart; bindex++)
- au_set_h_fptr(file, bindex, NULL);
- au_set_fbstart(file, bstart);
-
- btail = au_dbtaildir(dentry);
- for (bindex = au_fbend_dir(file); btail < bindex; bindex--)
- au_set_h_fptr(file, bindex, NULL);
- au_set_fbend_dir(file, btail);
-
- flags = vfsub_file_flags(file);
- for (bindex = bstart; bindex <= btail; bindex++) {
- h_dentry = au_h_dptr(dentry, bindex);
- if (!h_dentry)
- continue;
- h_file = au_hf_dir(file, bindex);
- if (h_file)
- continue;
-
- h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
- err = PTR_ERR(h_file);
- if (IS_ERR(h_file))
- goto out; /* close all? */
- au_set_h_fptr(file, bindex, h_file);
- }
- au_update_figen(file);
- /* todo: necessary? */
- /* file->f_ra = h_file->f_ra; */
- err = 0;
-
-out:
- return err;
-}
-
-static int do_open_dir(struct file *file, int flags, struct file *h_file)
-{
- int err;
- aufs_bindex_t bindex, btail;
- struct dentry *dentry, *h_dentry;
- struct vfsmount *mnt;
-
- FiMustWriteLock(file);
- AuDebugOn(h_file);
-
- err = 0;
- mnt = file->f_path.mnt;
- dentry = file->f_path.dentry;
- file->f_version = d_inode(dentry)->i_version;
- bindex = au_dbstart(dentry);
- au_set_fbstart(file, bindex);
- btail = au_dbtaildir(dentry);
- au_set_fbend_dir(file, btail);
- for (; !err && bindex <= btail; bindex++) {
- h_dentry = au_h_dptr(dentry, bindex);
- if (!h_dentry)
- continue;
-
- err = vfsub_test_mntns(mnt, h_dentry->d_sb);
- if (unlikely(err))
- break;
- h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
- if (IS_ERR(h_file)) {
- err = PTR_ERR(h_file);
- break;
- }
- au_set_h_fptr(file, bindex, h_file);
- }
- au_update_figen(file);
- /* todo: necessary? */
- /* file->f_ra = h_file->f_ra; */
- if (!err)
- return 0; /* success */
-
- /* close all */
- for (bindex = au_fbstart(file); bindex <= btail; bindex++)
- au_set_h_fptr(file, bindex, NULL);
- au_set_fbstart(file, -1);
- au_set_fbend_dir(file, -1);
-
- return err;
-}
-
-static int aufs_open_dir(struct inode *inode __maybe_unused,
- struct file *file)
-{
- int err;
- struct super_block *sb;
- struct au_fidir *fidir;
-
- err = -ENOMEM;
- sb = file->f_path.dentry->d_sb;
- si_read_lock(sb, AuLock_FLUSH);
- fidir = au_fidir_alloc(sb);
- if (fidir) {
- struct au_do_open_args args = {
- .open = do_open_dir,
- .fidir = fidir
- };
- err = au_do_open(file, &args);
- if (unlikely(err))
- kfree(fidir);
- }
- si_read_unlock(sb);
- return err;
-}
-
-static int aufs_release_dir(struct inode *inode __maybe_unused,
- struct file *file)
-{
- struct au_vdir *vdir_cache;
- struct au_finfo *finfo;
- struct au_fidir *fidir;
- aufs_bindex_t bindex, bend;
-
- finfo = au_fi(file);
- fidir = finfo->fi_hdir;
- if (fidir) {
- au_sphl_del(&finfo->fi_hlist,
- &au_sbi(file->f_path.dentry->d_sb)->si_files);
- vdir_cache = fidir->fd_vdir_cache; /* lock-free */
- if (vdir_cache)
- au_vdir_free(vdir_cache);
-
- bindex = finfo->fi_btop;
- if (bindex >= 0) {
- /*
- * calls fput() instead of filp_close(),
- * since no dnotify or lock for the lower file.
- */
- bend = fidir->fd_bbot;
- for (; bindex <= bend; bindex++)
- au_set_h_fptr(file, bindex, NULL);
- }
- kfree(fidir);
- finfo->fi_hdir = NULL;
- }
- au_finfo_fin(file);
- return 0;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static int au_do_flush_dir(struct file *file, fl_owner_t id)
-{
- int err;
- aufs_bindex_t bindex, bend;
- struct file *h_file;
-
- err = 0;
- bend = au_fbend_dir(file);
- for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
- h_file = au_hf_dir(file, bindex);
- if (h_file)
- err = vfsub_flush(h_file, id);
- }
- return err;
-}
-
-static int aufs_flush_dir(struct file *file, fl_owner_t id)
-{
- return au_do_flush(file, id, au_do_flush_dir);
-}
-
-/* ---------------------------------------------------------------------- */
-
-static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
-{
- int err;
- aufs_bindex_t bend, bindex;
- struct inode *inode;
- struct super_block *sb;
-
- err = 0;
- sb = dentry->d_sb;
- inode = d_inode(dentry);
- IMustLock(inode);
- bend = au_dbend(dentry);
- for (bindex = au_dbstart(dentry); !err && bindex <= bend; bindex++) {
- struct path h_path;
-
- if (au_test_ro(sb, bindex, inode))
- continue;
- h_path.dentry = au_h_dptr(dentry, bindex);
- if (!h_path.dentry)
- continue;
-
- h_path.mnt = au_sbr_mnt(sb, bindex);
- err = vfsub_fsync(NULL, &h_path, datasync);
- }
-
- return err;
-}
-
-static int au_do_fsync_dir(struct file *file, int datasync)
-{
- int err;
- aufs_bindex_t bend, bindex;
- struct file *h_file;
- struct super_block *sb;
- struct inode *inode;
-
- err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
- if (unlikely(err))
- goto out;
-
- inode = file_inode(file);
- sb = inode->i_sb;
- bend = au_fbend_dir(file);
- for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
- h_file = au_hf_dir(file, bindex);
- if (!h_file || au_test_ro(sb, bindex, inode))
- continue;
-
- err = vfsub_fsync(h_file, &h_file->f_path, datasync);
- }
-
-out:
- return err;
-}
-
-/*
- * @file may be NULL
- */
-static int aufs_fsync_dir(struct file *file, loff_t start, loff_t end,
- int datasync)
-{
- int err;
- struct dentry *dentry;
- struct inode *inode;
- struct super_block *sb;
- struct mutex *mtx;
-
- err = 0;
- dentry = file->f_path.dentry;
- inode = d_inode(dentry);
- mtx = &inode->i_mutex;
- mutex_lock(mtx);
- sb = dentry->d_sb;
- si_noflush_read_lock(sb);
- if (file)
- err = au_do_fsync_dir(file, datasync);
- else {
- di_write_lock_child(dentry);
- err = au_do_fsync_dir_no_file(dentry, datasync);
- }
- au_cpup_attr_timesizes(inode);
- di_write_unlock(dentry);
- if (file)
- fi_write_unlock(file);
-
- si_read_unlock(sb);
- mutex_unlock(mtx);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static int aufs_iterate(struct file *file, struct dir_context *ctx)
-{
- int err;
- struct dentry *dentry;
- struct inode *inode, *h_inode;
- struct super_block *sb;
-
- AuDbg("%pD, ctx{%pf, %llu}\n", file, ctx->actor, ctx->pos);
-
- dentry = file->f_path.dentry;
- inode = d_inode(dentry);
- IMustLock(inode);
-
- sb = dentry->d_sb;
- si_read_lock(sb, AuLock_FLUSH);
- err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
- if (unlikely(err))
- goto out;
- err = au_alive_dir(dentry);
- if (!err)
- err = au_vdir_init(file);
- di_downgrade_lock(dentry, AuLock_IR);
- if (unlikely(err))
- goto out_unlock;
-
- h_inode = au_h_iptr(inode, au_ibstart(inode));
- if (!au_test_nfsd()) {
- err = au_vdir_fill_de(file, ctx);
- fsstack_copy_attr_atime(inode, h_inode);
- } else {
- /*
- * nfsd filldir may call lookup_one_len(), vfs_getattr(),
- * encode_fh() and others.
- */
- atomic_inc(&h_inode->i_count);
- di_read_unlock(dentry, AuLock_IR);
- si_read_unlock(sb);
- err = au_vdir_fill_de(file, ctx);
- fsstack_copy_attr_atime(inode, h_inode);
- fi_write_unlock(file);
- iput(h_inode);
-
- AuTraceErr(err);
- return err;
- }
-
-out_unlock:
- di_read_unlock(dentry, AuLock_IR);
- fi_write_unlock(file);
-out:
- si_read_unlock(sb);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-#define AuTestEmpty_WHONLY 1
-#define AuTestEmpty_CALLED (1 << 1)
-#define AuTestEmpty_SHWH (1 << 2)
-#define au_ftest_testempty(flags, name) ((flags) & AuTestEmpty_##name)
-#define au_fset_testempty(flags, name) \
- do { (flags) |= AuTestEmpty_##name; } while (0)
-#define au_fclr_testempty(flags, name) \
- do { (flags) &= ~AuTestEmpty_##name; } while (0)
-
-#ifndef CONFIG_AUFS_SHWH
-#undef AuTestEmpty_SHWH
-#define AuTestEmpty_SHWH 0
-#endif
-
-struct test_empty_arg {
- struct dir_context ctx;
- struct au_nhash *whlist;
- unsigned int flags;
- int err;
- aufs_bindex_t bindex;
-};
-
-static int test_empty_cb(struct dir_context *ctx, const char *__name,
- int namelen, loff_t offset __maybe_unused, u64 ino,
- unsigned int d_type)
-{
- struct test_empty_arg *arg = container_of(ctx, struct test_empty_arg,
- ctx);
- char *name = (void *)__name;
-
- arg->err = 0;
- au_fset_testempty(arg->flags, CALLED);
- /* smp_mb(); */
- if (name[0] == '.'
- && (namelen == 1 || (name[1] == '.' && namelen == 2)))
- goto out; /* success */
-
- if (namelen <= AUFS_WH_PFX_LEN
- || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
- if (au_ftest_testempty(arg->flags, WHONLY)
- && !au_nhash_test_known_wh(arg->whlist, name, namelen))
- arg->err = -ENOTEMPTY;
- goto out;
- }
-
- name += AUFS_WH_PFX_LEN;
- namelen -= AUFS_WH_PFX_LEN;
- if (!au_nhash_test_known_wh(arg->whlist, name, namelen))
- arg->err = au_nhash_append_wh
- (arg->whlist, name, namelen, ino, d_type, arg->bindex,
- au_ftest_testempty(arg->flags, SHWH));
-
-out:
- /* smp_mb(); */
- AuTraceErr(arg->err);
- return arg->err;
-}
-
-static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
-{
- int err;
- struct file *h_file;
-
- h_file = au_h_open(dentry, arg->bindex,
- O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE,
- /*file*/NULL, /*force_wr*/0);
- err = PTR_ERR(h_file);
- if (IS_ERR(h_file))
- goto out;
-
- err = 0;
- if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
- && !file_inode(h_file)->i_nlink)
- goto out_put;
-
- do {
- arg->err = 0;
- au_fclr_testempty(arg->flags, CALLED);
- /* smp_mb(); */
- err = vfsub_iterate_dir(h_file, &arg->ctx);
- if (err >= 0)
- err = arg->err;
- } while (!err && au_ftest_testempty(arg->flags, CALLED));
-
-out_put:
- fput(h_file);
- au_sbr_put(dentry->d_sb, arg->bindex);
-out:
- return err;
-}
-
-struct do_test_empty_args {
- int *errp;
- struct dentry *dentry;
- struct test_empty_arg *arg;
-};
-
-static void call_do_test_empty(void *args)
-{
- struct do_test_empty_args *a = args;
- *a->errp = do_test_empty(a->dentry, a->arg);
-}
-
-static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
-{
- int err, wkq_err;
- struct dentry *h_dentry;
- struct inode *h_inode;
-
- h_dentry = au_h_dptr(dentry, arg->bindex);
- h_inode = d_inode(h_dentry);
- /* todo: i_mode changes anytime? */
- mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
- err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ);
- mutex_unlock(&h_inode->i_mutex);
- if (!err)
- err = do_test_empty(dentry, arg);
- else {
- struct do_test_empty_args args = {
- .errp = &err,
- .dentry = dentry,
- .arg = arg
- };
- unsigned int flags = arg->flags;
-
- wkq_err = au_wkq_wait(call_do_test_empty, &args);
- if (unlikely(wkq_err))
- err = wkq_err;
- arg->flags = flags;
- }
-
- return err;
-}
-
-int au_test_empty_lower(struct dentry *dentry)
-{
- int err;
- unsigned int rdhash;
- aufs_bindex_t bindex, bstart, btail;
- struct au_nhash whlist;
- struct test_empty_arg arg = {
- .ctx = {
- .actor = test_empty_cb
- }
- };
- int (*test_empty)(struct dentry *dentry, struct test_empty_arg *arg);
-
- SiMustAnyLock(dentry->d_sb);
-
- rdhash = au_sbi(dentry->d_sb)->si_rdhash;
- if (!rdhash)
- rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry));
- err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
- if (unlikely(err))
- goto out;
-
- arg.flags = 0;
- arg.whlist = &whlist;
- bstart = au_dbstart(dentry);
- if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
- au_fset_testempty(arg.flags, SHWH);
- test_empty = do_test_empty;
- if (au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1))
- test_empty = sio_test_empty;
- arg.bindex = bstart;
- err = test_empty(dentry, &arg);
- if (unlikely(err))
- goto out_whlist;
-
- au_fset_testempty(arg.flags, WHONLY);
- btail = au_dbtaildir(dentry);
- for (bindex = bstart + 1; !err && bindex <= btail; bindex++) {
- struct dentry *h_dentry;
-
- h_dentry = au_h_dptr(dentry, bindex);
- if (h_dentry && d_is_positive(h_dentry)) {
- arg.bindex = bindex;
- err = test_empty(dentry, &arg);
- }
- }
-
-out_whlist:
- au_nhash_wh_free(&whlist);
-out:
- return err;
-}
-
-int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
-{
- int err;
- struct test_empty_arg arg = {
- .ctx = {
- .actor = test_empty_cb
- }
- };
- aufs_bindex_t bindex, btail;
-
- err = 0;
- arg.whlist = whlist;
- arg.flags = AuTestEmpty_WHONLY;
- if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
- au_fset_testempty(arg.flags, SHWH);
- btail = au_dbtaildir(dentry);
- for (bindex = au_dbstart(dentry); !err && bindex <= btail; bindex++) {
- struct dentry *h_dentry;
-
- h_dentry = au_h_dptr(dentry, bindex);
- if (h_dentry && d_is_positive(h_dentry)) {
- arg.bindex = bindex;
- err = sio_test_empty(dentry, &arg);
- }
- }
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-const struct file_operations aufs_dir_fop = {
- .owner = THIS_MODULE,
- .llseek = default_llseek,
- .read = generic_read_dir,
- .iterate = aufs_iterate,
- .unlocked_ioctl = aufs_ioctl_dir,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = aufs_compat_ioctl_dir,
-#endif
- .open = aufs_open_dir,
- .release = aufs_release_dir,
- .flush = aufs_flush_dir,
- .fsync = aufs_fsync_dir
-};
diff --git a/fs/aufs/dir.h b/fs/aufs/dir.h
deleted file mode 100644
index b0a79d722..000000000
--- a/fs/aufs/dir.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * directory operations
- */
-
-#ifndef __AUFS_DIR_H__
-#define __AUFS_DIR_H__
-
-#ifdef __KERNEL__
-
-#include <linux/fs.h>
-
-/* ---------------------------------------------------------------------- */
-
-/* need to be faster and smaller */
-
-struct au_nhash {
- unsigned int nh_num;
- struct hlist_head *nh_head;
-};
-
-struct au_vdir_destr {
- unsigned char len;
- unsigned char name[0];
-} __packed;
-
-struct au_vdir_dehstr {
- struct hlist_node hash;
- struct au_vdir_destr *str;
-} ____cacheline_aligned_in_smp;
-
-struct au_vdir_de {
- ino_t de_ino;
- unsigned char de_type;
- /* caution: packed */
- struct au_vdir_destr de_str;
-} __packed;
-
-struct au_vdir_wh {
- struct hlist_node wh_hash;
-#ifdef CONFIG_AUFS_SHWH
- ino_t wh_ino;
- aufs_bindex_t wh_bindex;
- unsigned char wh_type;
-#else
- aufs_bindex_t wh_bindex;
-#endif
- /* caution: packed */
- struct au_vdir_destr wh_str;
-} __packed;
-
-union au_vdir_deblk_p {
- unsigned char *deblk;
- struct au_vdir_de *de;
-};
-
-struct au_vdir {
- unsigned char **vd_deblk;
- unsigned long vd_nblk;
- struct {
- unsigned long ul;
- union au_vdir_deblk_p p;
- } vd_last;
-
- unsigned long vd_version;
- unsigned int vd_deblk_sz;
- unsigned long vd_jiffy;
-} ____cacheline_aligned_in_smp;
-
-/* ---------------------------------------------------------------------- */
-
-/* dir.c */
-extern const struct file_operations aufs_dir_fop;
-void au_add_nlink(struct inode *dir, struct inode *h_dir);
-void au_sub_nlink(struct inode *dir, struct inode *h_dir);
-loff_t au_dir_size(struct file *file, struct dentry *dentry);
-void au_dir_ts(struct inode *dir, aufs_bindex_t bsrc);
-int au_test_empty_lower(struct dentry *dentry);
-int au_test_empty(struct dentry *dentry, struct au_nhash *whlist);
-
-/* vdir.c */
-unsigned int au_rdhash_est(loff_t sz);
-int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp);
-void au_nhash_wh_free(struct au_nhash *whlist);
-int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
- int limit);
-int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen);
-int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
- unsigned int d_type, aufs_bindex_t bindex,
- unsigned char shwh);
-void au_vdir_free(struct au_vdir *vdir);
-int au_vdir_init(struct file *file);
-int au_vdir_fill_de(struct file *file, struct dir_context *ctx);
-
-/* ioctl.c */
-long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg);
-
-#ifdef CONFIG_AUFS_RDU
-/* rdu.c */
-long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
-#ifdef CONFIG_COMPAT
-long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
- unsigned long arg);
-#endif
-#else
-AuStub(long, au_rdu_ioctl, return -EINVAL, struct file *file,
- unsigned int cmd, unsigned long arg)
-#ifdef CONFIG_COMPAT
-AuStub(long, au_rdu_compat_ioctl, return -EINVAL, struct file *file,
- unsigned int cmd, unsigned long arg)
-#endif
-#endif
-
-#endif /* __KERNEL__ */
-#endif /* __AUFS_DIR_H__ */
diff --git a/fs/aufs/dynop.c b/fs/aufs/dynop.c
deleted file mode 100644
index 53a8b55d8..000000000
--- a/fs/aufs/dynop.c
+++ /dev/null
@@ -1,356 +0,0 @@
-/*
- * Copyright (C) 2010-2016 Junjiro R. Okajima
- */
-
-/*
- * dynamically customizable operations for regular files
- */
-
-#include "aufs.h"
-
-#define DyPrSym(key) AuDbgSym(key->dk_op.dy_hop)
-
-/*
- * How large will these lists be?
- * Usually just a few elements, 20-30 at most for each, I guess.
- */
-static struct au_splhead dynop[AuDyLast];
-
-static struct au_dykey *dy_gfind_get(struct au_splhead *spl, const void *h_op)
-{
- struct au_dykey *key, *tmp;
- struct list_head *head;
-
- key = NULL;
- head = &spl->head;
- rcu_read_lock();
- list_for_each_entry_rcu(tmp, head, dk_list)
- if (tmp->dk_op.dy_hop == h_op) {
- key = tmp;
- kref_get(&key->dk_kref);
- break;
- }
- rcu_read_unlock();
-
- return key;
-}
-
-static struct au_dykey *dy_bradd(struct au_branch *br, struct au_dykey *key)
-{
- struct au_dykey **k, *found;
- const void *h_op = key->dk_op.dy_hop;
- int i;
-
- found = NULL;
- k = br->br_dykey;
- for (i = 0; i < AuBrDynOp; i++)
- if (k[i]) {
- if (k[i]->dk_op.dy_hop == h_op) {
- found = k[i];
- break;
- }
- } else
- break;
- if (!found) {
- spin_lock(&br->br_dykey_lock);
- for (; i < AuBrDynOp; i++)
- if (k[i]) {
- if (k[i]->dk_op.dy_hop == h_op) {
- found = k[i];
- break;
- }
- } else {
- k[i] = key;
- break;
- }
- spin_unlock(&br->br_dykey_lock);
- BUG_ON(i == AuBrDynOp); /* expand the array */
- }
-
- return found;
-}
-
-/* kref_get() if @key is already added */
-static struct au_dykey *dy_gadd(struct au_splhead *spl, struct au_dykey *key)
-{
- struct au_dykey *tmp, *found;
- struct list_head *head;
- const void *h_op = key->dk_op.dy_hop;
-
- found = NULL;
- head = &spl->head;
- spin_lock(&spl->spin);
- list_for_each_entry(tmp, head, dk_list)
- if (tmp->dk_op.dy_hop == h_op) {
- kref_get(&tmp->dk_kref);
- found = tmp;
- break;
- }
- if (!found)
- list_add_rcu(&key->dk_list, head);
- spin_unlock(&spl->spin);
-
- if (!found)
- DyPrSym(key);
- return found;
-}
-
-static void dy_free_rcu(struct rcu_head *rcu)
-{
- struct au_dykey *key;
-
- key = container_of(rcu, struct au_dykey, dk_rcu);
- DyPrSym(key);
- kfree(key);
-}
-
-static void dy_free(struct kref *kref)
-{
- struct au_dykey *key;
- struct au_splhead *spl;
-
- key = container_of(kref, struct au_dykey, dk_kref);
- spl = dynop + key->dk_op.dy_type;
- au_spl_del_rcu(&key->dk_list, spl);
- call_rcu(&key->dk_rcu, dy_free_rcu);
-}
-
-void au_dy_put(struct au_dykey *key)
-{
- kref_put(&key->dk_kref, dy_free);
-}
-
-/* ---------------------------------------------------------------------- */
-
-#define DyDbgSize(cnt, op) AuDebugOn(cnt != sizeof(op)/sizeof(void *))
-
-#ifdef CONFIG_AUFS_DEBUG
-#define DyDbgDeclare(cnt) unsigned int cnt = 0
-#define DyDbgInc(cnt) do { cnt++; } while (0)
-#else
-#define DyDbgDeclare(cnt) do {} while (0)
-#define DyDbgInc(cnt) do {} while (0)
-#endif
-
-#define DySet(func, dst, src, h_op, h_sb) do { \
- DyDbgInc(cnt); \
- if (h_op->func) { \
- if (src.func) \
- dst.func = src.func; \
- else \
- AuDbg("%s %s\n", au_sbtype(h_sb), #func); \
- } \
-} while (0)
-
-#define DySetForce(func, dst, src) do { \
- AuDebugOn(!src.func); \
- DyDbgInc(cnt); \
- dst.func = src.func; \
-} while (0)
-
-#define DySetAop(func) \
- DySet(func, dyaop->da_op, aufs_aop, h_aop, h_sb)
-#define DySetAopForce(func) \
- DySetForce(func, dyaop->da_op, aufs_aop)
-
-static void dy_aop(struct au_dykey *key, const void *h_op,
- struct super_block *h_sb __maybe_unused)
-{
- struct au_dyaop *dyaop = (void *)key;
- const struct address_space_operations *h_aop = h_op;
- DyDbgDeclare(cnt);
-
- AuDbg("%s\n", au_sbtype(h_sb));
-
- DySetAop(writepage);
- DySetAopForce(readpage); /* force */
- DySetAop(writepages);
- DySetAop(set_page_dirty);
- DySetAop(readpages);
- DySetAop(write_begin);
- DySetAop(write_end);
- DySetAop(bmap);
- DySetAop(invalidatepage);
- DySetAop(releasepage);
- DySetAop(freepage);
- /* this one will be changed according to an aufs mount option */
- DySetAop(direct_IO);
- DySetAop(migratepage);
- DySetAop(launder_page);
- DySetAop(is_partially_uptodate);
- DySetAop(is_dirty_writeback);
- DySetAop(error_remove_page);
- DySetAop(swap_activate);
- DySetAop(swap_deactivate);
-
- DyDbgSize(cnt, *h_aop);
-}
-
-/* ---------------------------------------------------------------------- */
-
-static void dy_bug(struct kref *kref)
-{
- BUG();
-}
-
-static struct au_dykey *dy_get(struct au_dynop *op, struct au_branch *br)
-{
- struct au_dykey *key, *old;
- struct au_splhead *spl;
- struct op {
- unsigned int sz;
- void (*set)(struct au_dykey *key, const void *h_op,
- struct super_block *h_sb __maybe_unused);
- };
- static const struct op a[] = {
- [AuDy_AOP] = {
- .sz = sizeof(struct au_dyaop),
- .set = dy_aop
- }
- };
- const struct op *p;
-
- spl = dynop + op->dy_type;
- key = dy_gfind_get(spl, op->dy_hop);
- if (key)
- goto out_add; /* success */
-
- p = a + op->dy_type;
- key = kzalloc(p->sz, GFP_NOFS);
- if (unlikely(!key)) {
- key = ERR_PTR(-ENOMEM);
- goto out;
- }
-
- key->dk_op.dy_hop = op->dy_hop;
- kref_init(&key->dk_kref);
- p->set(key, op->dy_hop, au_br_sb(br));
- old = dy_gadd(spl, key);
- if (old) {
- kfree(key);
- key = old;
- }
-
-out_add:
- old = dy_bradd(br, key);
- if (old)
- /* its ref-count should never be zero here */
- kref_put(&key->dk_kref, dy_bug);
-out:
- return key;
-}
-
-/* ---------------------------------------------------------------------- */
-/*
- * Aufs prohibits O_DIRECT by defaut even if the branch supports it.
- * This behaviour is necessary to return an error from open(O_DIRECT) instead
- * of the succeeding I/O. The dio mount option enables O_DIRECT and makes
- * open(O_DIRECT) always succeed, but the succeeding I/O may return an error.
- * See the aufs manual in detail.
- */
-static void dy_adx(struct au_dyaop *dyaop, int do_dx)
-{
- if (!do_dx)
- dyaop->da_op.direct_IO = NULL;
- else
- dyaop->da_op.direct_IO = aufs_aop.direct_IO;
-}
-
-static struct au_dyaop *dy_aget(struct au_branch *br,
- const struct address_space_operations *h_aop,
- int do_dx)
-{
- struct au_dyaop *dyaop;
- struct au_dynop op;
-
- op.dy_type = AuDy_AOP;
- op.dy_haop = h_aop;
- dyaop = (void *)dy_get(&op, br);
- if (IS_ERR(dyaop))
- goto out;
- dy_adx(dyaop, do_dx);
-
-out:
- return dyaop;
-}
-
-int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
- struct inode *h_inode)
-{
- int err, do_dx;
- struct super_block *sb;
- struct au_branch *br;
- struct au_dyaop *dyaop;
-
- AuDebugOn(!S_ISREG(h_inode->i_mode));
- IiMustWriteLock(inode);
-
- sb = inode->i_sb;
- br = au_sbr(sb, bindex);
- do_dx = !!au_opt_test(au_mntflags(sb), DIO);
- dyaop = dy_aget(br, h_inode->i_mapping->a_ops, do_dx);
- err = PTR_ERR(dyaop);
- if (IS_ERR(dyaop))
- /* unnecessary to call dy_fput() */
- goto out;
-
- err = 0;
- inode->i_mapping->a_ops = &dyaop->da_op;
-
-out:
- return err;
-}
-
-/*
- * Is it safe to replace a_ops during the inode/file is in operation?
- * Yes, I hope so.
- */
-int au_dy_irefresh(struct inode *inode)
-{
- int err;
- aufs_bindex_t bstart;
- struct inode *h_inode;
-
- err = 0;
- if (S_ISREG(inode->i_mode)) {
- bstart = au_ibstart(inode);
- h_inode = au_h_iptr(inode, bstart);
- err = au_dy_iaop(inode, bstart, h_inode);
- }
- return err;
-}
-
-void au_dy_arefresh(int do_dx)
-{
- struct au_splhead *spl;
- struct list_head *head;
- struct au_dykey *key;
-
- spl = dynop + AuDy_AOP;
- head = &spl->head;
- spin_lock(&spl->spin);
- list_for_each_entry(key, head, dk_list)
- dy_adx((void *)key, do_dx);
- spin_unlock(&spl->spin);
-}
-
-/* ---------------------------------------------------------------------- */
-
-void __init au_dy_init(void)
-{
- int i;
-
- /* make sure that 'struct au_dykey *' can be any type */
- BUILD_BUG_ON(offsetof(struct au_dyaop, da_key));
-
- for (i = 0; i < AuDyLast; i++)
- au_spl_init(dynop + i);
-}
-
-void au_dy_fin(void)
-{
- int i;
-
- for (i = 0; i < AuDyLast; i++)
- WARN_ON(!list_empty(&dynop[i].head));
-}
diff --git a/fs/aufs/dynop.h b/fs/aufs/dynop.h
deleted file mode 100644
index 8680bfc53..000000000
--- a/fs/aufs/dynop.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (C) 2010-2016 Junjiro R. Okajima
- */
-
-/*
- * dynamically customizable operations (for regular files only)
- */
-
-#ifndef __AUFS_DYNOP_H__
-#define __AUFS_DYNOP_H__
-
-#ifdef __KERNEL__
-
-#include <linux/fs.h>
-#include <linux/kref.h>
-
-enum {AuDy_AOP, AuDyLast};
-
-struct au_dynop {
- int dy_type;
- union {
- const void *dy_hop;
- const struct address_space_operations *dy_haop;
- };
-};
-
-struct au_dykey {
- union {
- struct list_head dk_list;
- struct rcu_head dk_rcu;
- };
- struct au_dynop dk_op;
-
- /*
- * during I am in the branch local array, kref is gotten. when the
- * branch is removed, kref is put.
- */
- struct kref dk_kref;
-};
-
-/* stop unioning since their sizes are very different from each other */
-struct au_dyaop {
- struct au_dykey da_key;
- struct address_space_operations da_op; /* not const */
-};
-
-/* ---------------------------------------------------------------------- */
-
-/* dynop.c */
-struct au_branch;
-void au_dy_put(struct au_dykey *key);
-int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
- struct inode *h_inode);
-int au_dy_irefresh(struct inode *inode);
-void au_dy_arefresh(int do_dio);
-
-void __init au_dy_init(void);
-void au_dy_fin(void);
-
-#endif /* __KERNEL__ */
-#endif /* __AUFS_DYNOP_H__ */
diff --git a/fs/aufs/export.c b/fs/aufs/export.c
deleted file mode 100644
index 7f6fec61f..000000000
--- a/fs/aufs/export.c
+++ /dev/null
@@ -1,819 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * export via nfs
- */
-
-#include <linux/exportfs.h>
-#include <linux/fs_struct.h>
-#include <linux/namei.h>
-#include <linux/nsproxy.h>
-#include <linux/random.h>
-#include <linux/writeback.h>
-#include "../fs/mount.h"
-#include "aufs.h"
-
-union conv {
-#ifdef CONFIG_AUFS_INO_T_64
- __u32 a[2];
-#else
- __u32 a[1];
-#endif
- ino_t ino;
-};
-
-static ino_t decode_ino(__u32 *a)
-{
- union conv u;
-
- BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a));
- u.a[0] = a[0];
-#ifdef CONFIG_AUFS_INO_T_64
- u.a[1] = a[1];
-#endif
- return u.ino;
-}
-
-static void encode_ino(__u32 *a, ino_t ino)
-{
- union conv u;
-
- u.ino = ino;
- a[0] = u.a[0];
-#ifdef CONFIG_AUFS_INO_T_64
- a[1] = u.a[1];
-#endif
-}
-
-/* NFS file handle */
-enum {
- Fh_br_id,
- Fh_sigen,
-#ifdef CONFIG_AUFS_INO_T_64
- /* support 64bit inode number */
- Fh_ino1,
- Fh_ino2,
- Fh_dir_ino1,
- Fh_dir_ino2,
-#else
- Fh_ino1,
- Fh_dir_ino1,
-#endif
- Fh_igen,
- Fh_h_type,
- Fh_tail,
-
- Fh_ino = Fh_ino1,
- Fh_dir_ino = Fh_dir_ino1
-};
-
-static int au_test_anon(struct dentry *dentry)
-{
- /* note: read d_flags without d_lock */
- return !!(dentry->d_flags & DCACHE_DISCONNECTED);
-}
-
-int au_test_nfsd(void)
-{
- int ret;
- struct task_struct *tsk = current;
- char comm[sizeof(tsk->comm)];
-
- ret = 0;
- if (tsk->flags & PF_KTHREAD) {
- get_task_comm(comm, tsk);
- ret = !strcmp(comm, "nfsd");
- }
-
- return ret;
-}
-
-/* ---------------------------------------------------------------------- */
-/* inode generation external table */
-
-void au_xigen_inc(struct inode *inode)
-{
- loff_t pos;
- ssize_t sz;
- __u32 igen;
- struct super_block *sb;
- struct au_sbinfo *sbinfo;
-
- sb = inode->i_sb;
- AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
-
- sbinfo = au_sbi(sb);
- pos = inode->i_ino;
- pos *= sizeof(igen);
- igen = inode->i_generation + 1;
- sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen,
- sizeof(igen), &pos);
- if (sz == sizeof(igen))
- return; /* success */
-
- if (unlikely(sz >= 0))
- AuIOErr("xigen error (%zd)\n", sz);
-}
-
-int au_xigen_new(struct inode *inode)
-{
- int err;
- loff_t pos;
- ssize_t sz;
- struct super_block *sb;
- struct au_sbinfo *sbinfo;
- struct file *file;
-
- err = 0;
- /* todo: dirty, at mount time */
- if (inode->i_ino == AUFS_ROOT_INO)
- goto out;
- sb = inode->i_sb;
- SiMustAnyLock(sb);
- if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
- goto out;
-
- err = -EFBIG;
- pos = inode->i_ino;
- if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) {
- AuIOErr1("too large i%lld\n", pos);
- goto out;
- }
- pos *= sizeof(inode->i_generation);
-
- err = 0;
- sbinfo = au_sbi(sb);
- file = sbinfo->si_xigen;
- BUG_ON(!file);
-
- if (vfsub_f_size_read(file)
- < pos + sizeof(inode->i_generation)) {
- inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next);
- sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation,
- sizeof(inode->i_generation), &pos);
- } else
- sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation,
- sizeof(inode->i_generation), &pos);
- if (sz == sizeof(inode->i_generation))
- goto out; /* success */
-
- err = sz;
- if (unlikely(sz >= 0)) {
- err = -EIO;
- AuIOErr("xigen error (%zd)\n", sz);
- }
-
-out:
- return err;
-}
-
-int au_xigen_set(struct super_block *sb, struct file *base)
-{
- int err;
- struct au_sbinfo *sbinfo;
- struct file *file;
-
- SiMustWriteLock(sb);
-
- sbinfo = au_sbi(sb);
- file = au_xino_create2(base, sbinfo->si_xigen);
- err = PTR_ERR(file);
- if (IS_ERR(file))
- goto out;
- err = 0;
- if (sbinfo->si_xigen)
- fput(sbinfo->si_xigen);
- sbinfo->si_xigen = file;
-
-out:
- return err;
-}
-
-void au_xigen_clr(struct super_block *sb)
-{
- struct au_sbinfo *sbinfo;
-
- SiMustWriteLock(sb);
-
- sbinfo = au_sbi(sb);
- if (sbinfo->si_xigen) {
- fput(sbinfo->si_xigen);
- sbinfo->si_xigen = NULL;
- }
-}
-
-/* ---------------------------------------------------------------------- */
-
-static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino,
- ino_t dir_ino)
-{
- struct dentry *dentry, *d;
- struct inode *inode;
- unsigned int sigen;
-
- dentry = NULL;
- inode = ilookup(sb, ino);
- if (!inode)
- goto out;
-
- dentry = ERR_PTR(-ESTALE);
- sigen = au_sigen(sb);
- if (unlikely(is_bad_inode(inode)
- || IS_DEADDIR(inode)
- || sigen != au_iigen(inode, NULL)))
- goto out_iput;
-
- dentry = NULL;
- if (!dir_ino || S_ISDIR(inode->i_mode))
- dentry = d_find_alias(inode);
- else {
- spin_lock(&inode->i_lock);
- hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
- spin_lock(&d->d_lock);
- if (!au_test_anon(d)
- && d_inode(d->d_parent)->i_ino == dir_ino) {
- dentry = dget_dlock(d);
- spin_unlock(&d->d_lock);
- break;
- }
- spin_unlock(&d->d_lock);
- }
- spin_unlock(&inode->i_lock);
- }
- if (unlikely(dentry && au_digen_test(dentry, sigen))) {
- /* need to refresh */
- dput(dentry);
- dentry = NULL;
- }
-
-out_iput:
- iput(inode);
-out:
- AuTraceErrPtr(dentry);
- return dentry;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* todo: dirty? */
-/* if exportfs_decode_fh() passed vfsmount*, we could be happy */
-
-struct au_compare_mnt_args {
- /* input */
- struct super_block *sb;
-
- /* output */
- struct vfsmount *mnt;
-};
-
-static int au_compare_mnt(struct vfsmount *mnt, void *arg)
-{
- struct au_compare_mnt_args *a = arg;
-
- if (mnt->mnt_sb != a->sb)
- return 0;
- a->mnt = mntget(mnt);
- return 1;
-}
-
-static struct vfsmount *au_mnt_get(struct super_block *sb)
-{
- int err;
- struct path root;
- struct au_compare_mnt_args args = {
- .sb = sb
- };
-
- get_fs_root(current->fs, &root);
- rcu_read_lock();
- err = iterate_mounts(au_compare_mnt, &args, root.mnt);
- rcu_read_unlock();
- path_put(&root);
- AuDebugOn(!err);
- AuDebugOn(!args.mnt);
- return args.mnt;
-}
-
-struct au_nfsd_si_lock {
- unsigned int sigen;
- aufs_bindex_t bindex, br_id;
- unsigned char force_lock;
-};
-
-static int si_nfsd_read_lock(struct super_block *sb,
- struct au_nfsd_si_lock *nsi_lock)
-{
- int err;
- aufs_bindex_t bindex;
-
- si_read_lock(sb, AuLock_FLUSH);
-
- /* branch id may be wrapped around */
- err = 0;
- bindex = au_br_index(sb, nsi_lock->br_id);
- if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb))
- goto out; /* success */
-
- err = -ESTALE;
- bindex = -1;
- if (!nsi_lock->force_lock)
- si_read_unlock(sb);
-
-out:
- nsi_lock->bindex = bindex;
- return err;
-}
-
-struct find_name_by_ino {
- struct dir_context ctx;
- int called, found;
- ino_t ino;
- char *name;
- int namelen;
-};
-
-static int
-find_name_by_ino(struct dir_context *ctx, const char *name, int namelen,
- loff_t offset, u64 ino, unsigned int d_type)
-{
- struct find_name_by_ino *a = container_of(ctx, struct find_name_by_ino,
- ctx);
-
- a->called++;
- if (a->ino != ino)
- return 0;
-
- memcpy(a->name, name, namelen);
- a->namelen = namelen;
- a->found = 1;
- return 1;
-}
-
-static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino,
- struct au_nfsd_si_lock *nsi_lock)
-{
- struct dentry *dentry, *parent;
- struct file *file;
- struct inode *dir;
- struct find_name_by_ino arg = {
- .ctx = {
- .actor = find_name_by_ino
- }
- };
- int err;
-
- parent = path->dentry;
- if (nsi_lock)
- si_read_unlock(parent->d_sb);
- file = vfsub_dentry_open(path, au_dir_roflags);
- dentry = (void *)file;
- if (IS_ERR(file))
- goto out;
-
- dentry = ERR_PTR(-ENOMEM);
- arg.name = (void *)__get_free_page(GFP_NOFS);
- if (unlikely(!arg.name))
- goto out_file;
- arg.ino = ino;
- arg.found = 0;
- do {
- arg.called = 0;
- /* smp_mb(); */
- err = vfsub_iterate_dir(file, &arg.ctx);
- } while (!err && !arg.found && arg.called);
- dentry = ERR_PTR(err);
- if (unlikely(err))
- goto out_name;
- /* instead of ENOENT */
- dentry = ERR_PTR(-ESTALE);
- if (!arg.found)
- goto out_name;
-
- /* do not call vfsub_lkup_one() */
- dir = d_inode(parent);
- mutex_lock(&dir->i_mutex);
- dentry = vfsub_lookup_one_len(arg.name, parent, arg.namelen);
- mutex_unlock(&dir->i_mutex);
- AuTraceErrPtr(dentry);
- if (IS_ERR(dentry))
- goto out_name;
- AuDebugOn(au_test_anon(dentry));
- if (unlikely(d_really_is_negative(dentry))) {
- dput(dentry);
- dentry = ERR_PTR(-ENOENT);
- }
-
-out_name:
- free_page((unsigned long)arg.name);
-out_file:
- fput(file);
-out:
- if (unlikely(nsi_lock
- && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0))
- if (!IS_ERR(dentry)) {
- dput(dentry);
- dentry = ERR_PTR(-ESTALE);
- }
- AuTraceErrPtr(dentry);
- return dentry;
-}
-
-static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino,
- ino_t dir_ino,
- struct au_nfsd_si_lock *nsi_lock)
-{
- struct dentry *dentry;
- struct path path;
-
- if (dir_ino != AUFS_ROOT_INO) {
- path.dentry = decode_by_ino(sb, dir_ino, 0);
- dentry = path.dentry;
- if (!path.dentry || IS_ERR(path.dentry))
- goto out;
- AuDebugOn(au_test_anon(path.dentry));
- } else
- path.dentry = dget(sb->s_root);
-
- path.mnt = au_mnt_get(sb);
- dentry = au_lkup_by_ino(&path, ino, nsi_lock);
- path_put(&path);
-
-out:
- AuTraceErrPtr(dentry);
- return dentry;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static int h_acceptable(void *expv, struct dentry *dentry)
-{
- return 1;
-}
-
-static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath,
- char *buf, int len, struct super_block *sb)
-{
- char *p;
- int n;
- struct path path;
-
- p = d_path(h_rootpath, buf, len);
- if (IS_ERR(p))
- goto out;
- n = strlen(p);
-
- path.mnt = h_rootpath->mnt;
- path.dentry = h_parent;
- p = d_path(&path, buf, len);
- if (IS_ERR(p))
- goto out;
- if (n != 1)
- p += n;
-
- path.mnt = au_mnt_get(sb);
- path.dentry = sb->s_root;
- p = d_path(&path, buf, len - strlen(p));
- mntput(path.mnt);
- if (IS_ERR(p))
- goto out;
- if (n != 1)
- p[strlen(p)] = '/';
-
-out:
- AuTraceErrPtr(p);
- return p;
-}
-
-static
-struct dentry *decode_by_path(struct super_block *sb, ino_t ino, __u32 *fh,
- int fh_len, struct au_nfsd_si_lock *nsi_lock)
-{
- struct dentry *dentry, *h_parent, *root;
- struct super_block *h_sb;
- char *pathname, *p;
- struct vfsmount *h_mnt;
- struct au_branch *br;
- int err;
- struct path path;
-
- br = au_sbr(sb, nsi_lock->bindex);
- h_mnt = au_br_mnt(br);
- h_sb = h_mnt->mnt_sb;
- /* todo: call lower fh_to_dentry()? fh_to_parent()? */
- h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
- fh_len - Fh_tail, fh[Fh_h_type],
- h_acceptable, /*context*/NULL);
- dentry = h_parent;
- if (unlikely(!h_parent || IS_ERR(h_parent))) {
- AuWarn1("%s decode_fh failed, %ld\n",
- au_sbtype(h_sb), PTR_ERR(h_parent));
- goto out;
- }
- dentry = NULL;
- if (unlikely(au_test_anon(h_parent))) {
- AuWarn1("%s decode_fh returned a disconnected dentry\n",
- au_sbtype(h_sb));
- goto out_h_parent;
- }
-
- dentry = ERR_PTR(-ENOMEM);
- pathname = (void *)__get_free_page(GFP_NOFS);
- if (unlikely(!pathname))
- goto out_h_parent;
-
- root = sb->s_root;
- path.mnt = h_mnt;
- di_read_lock_parent(root, !AuLock_IR);
- path.dentry = au_h_dptr(root, nsi_lock->bindex);
- di_read_unlock(root, !AuLock_IR);
- p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb);
- dentry = (void *)p;
- if (IS_ERR(p))
- goto out_pathname;
-
- si_read_unlock(sb);
- err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
- dentry = ERR_PTR(err);
- if (unlikely(err))
- goto out_relock;
-
- dentry = ERR_PTR(-ENOENT);
- AuDebugOn(au_test_anon(path.dentry));
- if (unlikely(d_really_is_negative(path.dentry)))
- goto out_path;
-
- if (ino != d_inode(path.dentry)->i_ino)
- dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL);
- else
- dentry = dget(path.dentry);
-
-out_path:
- path_put(&path);
-out_relock:
- if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0))
- if (!IS_ERR(dentry)) {
- dput(dentry);
- dentry = ERR_PTR(-ESTALE);
- }
-out_pathname:
- free_page((unsigned long)pathname);
-out_h_parent:
- dput(h_parent);
-out:
- AuTraceErrPtr(dentry);
- return dentry;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static struct dentry *
-aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
- int fh_type)
-{
- struct dentry *dentry;
- __u32 *fh = fid->raw;
- struct au_branch *br;
- ino_t ino, dir_ino;
- struct au_nfsd_si_lock nsi_lock = {
- .force_lock = 0
- };
-
- dentry = ERR_PTR(-ESTALE);
- /* it should never happen, but the file handle is unreliable */
- if (unlikely(fh_len < Fh_tail))
- goto out;
- nsi_lock.sigen = fh[Fh_sigen];
- nsi_lock.br_id = fh[Fh_br_id];
-
- /* branch id may be wrapped around */
- br = NULL;
- if (unlikely(si_nfsd_read_lock(sb, &nsi_lock)))
- goto out;
- nsi_lock.force_lock = 1;
-
- /* is this inode still cached? */
- ino = decode_ino(fh + Fh_ino);
- /* it should never happen */
- if (unlikely(ino == AUFS_ROOT_INO))
- goto out_unlock;
-
- dir_ino = decode_ino(fh + Fh_dir_ino);
- dentry = decode_by_ino(sb, ino, dir_ino);
- if (IS_ERR(dentry))
- goto out_unlock;
- if (dentry)
- goto accept;
-
- /* is the parent dir cached? */
- br = au_sbr(sb, nsi_lock.bindex);
- atomic_inc(&br->br_count);
- dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
- if (IS_ERR(dentry))
- goto out_unlock;
- if (dentry)
- goto accept;
-
- /* lookup path */
- dentry = decode_by_path(sb, ino, fh, fh_len, &nsi_lock);
- if (IS_ERR(dentry))
- goto out_unlock;
- if (unlikely(!dentry))
- /* todo?: make it ESTALE */
- goto out_unlock;
-
-accept:
- if (!au_digen_test(dentry, au_sigen(sb))
- && d_inode(dentry)->i_generation == fh[Fh_igen])
- goto out_unlock; /* success */
-
- dput(dentry);
- dentry = ERR_PTR(-ESTALE);
-out_unlock:
- if (br)
- atomic_dec(&br->br_count);
- si_read_unlock(sb);
-out:
- AuTraceErrPtr(dentry);
- return dentry;
-}
-
-#if 0 /* reserved for future use */
-/* support subtreecheck option */
-static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid,
- int fh_len, int fh_type)
-{
- struct dentry *parent;
- __u32 *fh = fid->raw;
- ino_t dir_ino;
-
- dir_ino = decode_ino(fh + Fh_dir_ino);
- parent = decode_by_ino(sb, dir_ino, 0);
- if (IS_ERR(parent))
- goto out;
- if (!parent)
- parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]),
- dir_ino, fh, fh_len);
-
-out:
- AuTraceErrPtr(parent);
- return parent;
-}
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-static int aufs_encode_fh(struct inode *inode, __u32 *fh, int *max_len,
- struct inode *dir)
-{
- int err;
- aufs_bindex_t bindex;
- struct super_block *sb, *h_sb;
- struct dentry *dentry, *parent, *h_parent;
- struct inode *h_dir;
- struct au_branch *br;
-
- err = -ENOSPC;
- if (unlikely(*max_len <= Fh_tail)) {
- AuWarn1("NFSv2 client (max_len %d)?\n", *max_len);
- goto out;
- }
-
- err = FILEID_ROOT;
- if (inode->i_ino == AUFS_ROOT_INO) {
- AuDebugOn(inode->i_ino != AUFS_ROOT_INO);
- goto out;
- }
-
- h_parent = NULL;
- sb = inode->i_sb;
- err = si_read_lock(sb, AuLock_FLUSH);
- if (unlikely(err))
- goto out;
-
-#ifdef CONFIG_AUFS_DEBUG
- if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
- AuWarn1("NFS-exporting requires xino\n");
-#endif
- err = -EIO;
- parent = NULL;
- ii_read_lock_child(inode);
- bindex = au_ibstart(inode);
- if (!dir) {
- dentry = d_find_any_alias(inode);
- if (unlikely(!dentry))
- goto out_unlock;
- AuDebugOn(au_test_anon(dentry));
- parent = dget_parent(dentry);
- dput(dentry);
- if (unlikely(!parent))
- goto out_unlock;
- if (d_really_is_positive(parent))
- dir = d_inode(parent);
- }
-
- ii_read_lock_parent(dir);
- h_dir = au_h_iptr(dir, bindex);
- ii_read_unlock(dir);
- if (unlikely(!h_dir))
- goto out_parent;
- h_parent = d_find_any_alias(h_dir);
- if (unlikely(!h_parent))
- goto out_hparent;
-
- err = -EPERM;
- br = au_sbr(sb, bindex);
- h_sb = au_br_sb(br);
- if (unlikely(!h_sb->s_export_op)) {
- AuErr1("%s branch is not exportable\n", au_sbtype(h_sb));
- goto out_hparent;
- }
-
- fh[Fh_br_id] = br->br_id;
- fh[Fh_sigen] = au_sigen(sb);
- encode_ino(fh + Fh_ino, inode->i_ino);
- encode_ino(fh + Fh_dir_ino, dir->i_ino);
- fh[Fh_igen] = inode->i_generation;
-
- *max_len -= Fh_tail;
- fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail),
- max_len,
- /*connectable or subtreecheck*/0);
- err = fh[Fh_h_type];
- *max_len += Fh_tail;
- /* todo: macros? */
- if (err != FILEID_INVALID)
- err = 99;
- else
- AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb));
-
-out_hparent:
- dput(h_parent);
-out_parent:
- dput(parent);
-out_unlock:
- ii_read_unlock(inode);
- si_read_unlock(sb);
-out:
- if (unlikely(err < 0))
- err = FILEID_INVALID;
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static int aufs_commit_metadata(struct inode *inode)
-{
- int err;
- aufs_bindex_t bindex;
- struct super_block *sb;
- struct inode *h_inode;
- int (*f)(struct inode *inode);
-
- sb = inode->i_sb;
- si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
- ii_write_lock_child(inode);
- bindex = au_ibstart(inode);
- AuDebugOn(bindex < 0);
- h_inode = au_h_iptr(inode, bindex);
-
- f = h_inode->i_sb->s_export_op->commit_metadata;
- if (f)
- err = f(h_inode);
- else {
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_ALL,
- .nr_to_write = 0 /* metadata only */
- };
-
- err = sync_inode(h_inode, &wbc);
- }
-
- au_cpup_attr_timesizes(inode);
- ii_write_unlock(inode);
- si_read_unlock(sb);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static struct export_operations aufs_export_op = {
- .fh_to_dentry = aufs_fh_to_dentry,
- /* .fh_to_parent = aufs_fh_to_parent, */
- .encode_fh = aufs_encode_fh,
- .commit_metadata = aufs_commit_metadata
-};
-
-void au_export_init(struct super_block *sb)
-{
- struct au_sbinfo *sbinfo;
- __u32 u;
-
- sb->s_export_op = &aufs_export_op;
- sbinfo = au_sbi(sb);
- sbinfo->si_xigen = NULL;
- get_random_bytes(&u, sizeof(u));
- BUILD_BUG_ON(sizeof(u) != sizeof(int));
- atomic_set(&sbinfo->si_xigen_next, u);
-}
diff --git a/fs/aufs/f_op.c b/fs/aufs/f_op.c
deleted file mode 100644
index 145dec870..000000000
--- a/fs/aufs/f_op.c
+++ /dev/null
@@ -1,759 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * file and vm operations
- */
-
-#include <linux/aio.h>
-#include <linux/fs_stack.h>
-#include <linux/mman.h>
-#include <linux/security.h>
-#include "aufs.h"
-
-int au_do_open_nondir(struct file *file, int flags, struct file *h_file)
-{
- int err;
- aufs_bindex_t bindex;
- struct dentry *dentry, *h_dentry;
- struct au_finfo *finfo;
- struct inode *h_inode;
-
- FiMustWriteLock(file);
-
- err = 0;
- dentry = file->f_path.dentry;
- AuDebugOn(IS_ERR_OR_NULL(dentry));
- finfo = au_fi(file);
- memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop));
- atomic_set(&finfo->fi_mmapped, 0);
- bindex = au_dbstart(dentry);
- if (!h_file) {
- h_dentry = au_h_dptr(dentry, bindex);
- err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
- if (unlikely(err))
- goto out;
- h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
- } else {
- h_dentry = h_file->f_path.dentry;
- err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
- if (unlikely(err))
- goto out;
- get_file(h_file);
- }
- if (IS_ERR(h_file))
- err = PTR_ERR(h_file);
- else {
- if ((flags & __O_TMPFILE)
- && !(flags & O_EXCL)) {
- h_inode = file_inode(h_file);
- spin_lock(&h_inode->i_lock);
- h_inode->i_state |= I_LINKABLE;
- spin_unlock(&h_inode->i_lock);
- }
- au_set_fbstart(file, bindex);
- au_set_h_fptr(file, bindex, h_file);
- au_update_figen(file);
- /* todo: necessary? */
- /* file->f_ra = h_file->f_ra; */
- }
-
-out:
- return err;
-}
-
-static int aufs_open_nondir(struct inode *inode __maybe_unused,
- struct file *file)
-{
- int err;
- struct super_block *sb;
- struct au_do_open_args args = {
- .open = au_do_open_nondir
- };
-
- AuDbg("%pD, f_flags 0x%x, f_mode 0x%x\n",
- file, vfsub_file_flags(file), file->f_mode);
-
- sb = file->f_path.dentry->d_sb;
- si_read_lock(sb, AuLock_FLUSH);
- err = au_do_open(file, &args);
- si_read_unlock(sb);
- return err;
-}
-
-int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
-{
- struct au_finfo *finfo;
- aufs_bindex_t bindex;
-
- finfo = au_fi(file);
- au_sphl_del(&finfo->fi_hlist,
- &au_sbi(file->f_path.dentry->d_sb)->si_files);
- bindex = finfo->fi_btop;
- if (bindex >= 0)
- au_set_h_fptr(file, bindex, NULL);
-
- au_finfo_fin(file);
- return 0;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static int au_do_flush_nondir(struct file *file, fl_owner_t id)
-{
- int err;
- struct file *h_file;
-
- err = 0;
- h_file = au_hf_top(file);
- if (h_file)
- err = vfsub_flush(h_file, id);
- return err;
-}
-
-static int aufs_flush_nondir(struct file *file, fl_owner_t id)
-{
- return au_do_flush(file, id, au_do_flush_nondir);
-}
-
-/* ---------------------------------------------------------------------- */
-/*
- * read and write functions acquire [fdi]_rwsem once, but release before
- * mmap_sem. This is because to stop a race condition between mmap(2).
- * Releasing these aufs-rwsem should be safe, no branch-mamagement (by keeping
- * si_rwsem), no harmful copy-up should happen. Actually copy-up may happen in
- * read functions after [fdi]_rwsem are released, but it should be harmless.
- */
-
-/* Callers should call au_read_post() or fput() in the end */
-struct file *au_read_pre(struct file *file, int keep_fi)
-{
- struct file *h_file;
- int err;
-
- err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
- if (!err) {
- di_read_unlock(file->f_path.dentry, AuLock_IR);
- h_file = au_hf_top(file);
- get_file(h_file);
- if (!keep_fi)
- fi_read_unlock(file);
- } else
- h_file = ERR_PTR(err);
-
- return h_file;
-}
-
-static void au_read_post(struct inode *inode, struct file *h_file)
-{
- /* update without lock, I don't think it a problem */
- fsstack_copy_attr_atime(inode, file_inode(h_file));
- fput(h_file);
-}
-
-struct au_write_pre {
- blkcnt_t blks;
- aufs_bindex_t bstart;
-};
-
-/*
- * return with iinfo is write-locked
- * callers should call au_write_post() or iinfo_write_unlock() + fput() in the
- * end
- */
-static struct file *au_write_pre(struct file *file, int do_ready,
- struct au_write_pre *wpre)
-{
- struct file *h_file;
- struct dentry *dentry;
- int err;
- struct au_pin pin;
-
- err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
- h_file = ERR_PTR(err);
- if (unlikely(err))
- goto out;
-
- dentry = file->f_path.dentry;
- if (do_ready) {
- err = au_ready_to_write(file, -1, &pin);
- if (unlikely(err)) {
- h_file = ERR_PTR(err);
- di_write_unlock(dentry);
- goto out_fi;
- }
- }
-
- di_downgrade_lock(dentry, /*flags*/0);
- if (wpre)
- wpre->bstart = au_fbstart(file);
- h_file = au_hf_top(file);
- get_file(h_file);
- if (wpre)
- wpre->blks = file_inode(h_file)->i_blocks;
- if (do_ready)
- au_unpin(&pin);
- di_read_unlock(dentry, /*flags*/0);
-
-out_fi:
- fi_write_unlock(file);
-out:
- return h_file;
-}
-
-static void au_write_post(struct inode *inode, struct file *h_file,
- struct au_write_pre *wpre, ssize_t written)
-{
- struct inode *h_inode;
-
- au_cpup_attr_timesizes(inode);
- AuDebugOn(au_ibstart(inode) != wpre->bstart);
- h_inode = file_inode(h_file);
- inode->i_mode = h_inode->i_mode;
- ii_write_unlock(inode);
- fput(h_file);
-
- /* AuDbg("blks %llu, %llu\n", (u64)blks, (u64)h_inode->i_blocks); */
- if (written > 0)
- au_fhsm_wrote(inode->i_sb, wpre->bstart,
- /*force*/h_inode->i_blocks > wpre->blks);
-}
-
-static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
- loff_t *ppos)
-{
- ssize_t err;
- struct inode *inode;
- struct file *h_file;
- struct super_block *sb;
-
- inode = file_inode(file);
- sb = inode->i_sb;
- si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
-
- h_file = au_read_pre(file, /*keep_fi*/0);
- err = PTR_ERR(h_file);
- if (IS_ERR(h_file))
- goto out;
-
- /* filedata may be obsoleted by concurrent copyup, but no problem */
- err = vfsub_read_u(h_file, buf, count, ppos);
- /* todo: necessary? */
- /* file->f_ra = h_file->f_ra; */
- au_read_post(inode, h_file);
-
-out:
- si_read_unlock(sb);
- return err;
-}
-
-/*
- * todo: very ugly
- * it locks both of i_mutex and si_rwsem for read in safe.
- * if the plink maintenance mode continues forever (that is the problem),
- * may loop forever.
- */
-static void au_mtx_and_read_lock(struct inode *inode)
-{
- int err;
- struct super_block *sb = inode->i_sb;
-
- while (1) {
- mutex_lock(&inode->i_mutex);
- err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
- if (!err)
- break;
- mutex_unlock(&inode->i_mutex);
- si_read_lock(sb, AuLock_NOPLMW);
- si_read_unlock(sb);
- }
-}
-
-static ssize_t aufs_write(struct file *file, const char __user *ubuf,
- size_t count, loff_t *ppos)
-{
- ssize_t err;
- struct au_write_pre wpre;
- struct inode *inode;
- struct file *h_file;
- char __user *buf = (char __user *)ubuf;
-
- inode = file_inode(file);
- au_mtx_and_read_lock(inode);
-
- h_file = au_write_pre(file, /*do_ready*/1, &wpre);
- err = PTR_ERR(h_file);
- if (IS_ERR(h_file))
- goto out;
-
- err = vfsub_write_u(h_file, buf, count, ppos);
- au_write_post(inode, h_file, &wpre, err);
-
-out:
- si_read_unlock(inode->i_sb);
- mutex_unlock(&inode->i_mutex);
- return err;
-}
-
-static ssize_t au_do_iter(struct file *h_file, int rw, struct kiocb *kio,
- struct iov_iter *iov_iter)
-{
- ssize_t err;
- struct file *file;
- ssize_t (*iter)(struct kiocb *, struct iov_iter *);
-
- err = security_file_permission(h_file, rw);
- if (unlikely(err))
- goto out;
-
- err = -ENOSYS;
- iter = NULL;
- if (rw == MAY_READ)
- iter = h_file->f_op->read_iter;
- else if (rw == MAY_WRITE)
- iter = h_file->f_op->write_iter;
-
- file = kio->ki_filp;
- kio->ki_filp = h_file;
- if (iter) {
- lockdep_off();
- err = iter(kio, iov_iter);
- lockdep_on();
- } else
- /* currently there is no such fs */
- WARN_ON_ONCE(1);
- kio->ki_filp = file;
-
-out:
- return err;
-}
-
-static ssize_t aufs_read_iter(struct kiocb *kio, struct iov_iter *iov_iter)
-{
- ssize_t err;
- struct file *file, *h_file;
- struct inode *inode;
- struct super_block *sb;
-
- file = kio->ki_filp;
- inode = file_inode(file);
- sb = inode->i_sb;
- si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
-
- h_file = au_read_pre(file, /*keep_fi*/0);
- err = PTR_ERR(h_file);
- if (IS_ERR(h_file))
- goto out;
-
- err = au_do_iter(h_file, MAY_READ, kio, iov_iter);
- /* todo: necessary? */
- /* file->f_ra = h_file->f_ra; */
- au_read_post(inode, h_file);
-
-out:
- si_read_unlock(sb);
- return err;
-}
-
-static ssize_t aufs_write_iter(struct kiocb *kio, struct iov_iter *iov_iter)
-{
- ssize_t err;
- struct au_write_pre wpre;
- struct inode *inode;
- struct file *file, *h_file;
-
- file = kio->ki_filp;
- inode = file_inode(file);
- au_mtx_and_read_lock(inode);
-
- h_file = au_write_pre(file, /*do_ready*/1, &wpre);
- err = PTR_ERR(h_file);
- if (IS_ERR(h_file))
- goto out;
-
- err = au_do_iter(h_file, MAY_WRITE, kio, iov_iter);
- au_write_post(inode, h_file, &wpre, err);
-
-out:
- si_read_unlock(inode->i_sb);
- mutex_unlock(&inode->i_mutex);
- return err;
-}
-
-static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t len,
- unsigned int flags)
-{
- ssize_t err;
- struct file *h_file;
- struct inode *inode;
- struct super_block *sb;
-
- inode = file_inode(file);
- sb = inode->i_sb;
- si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
-
- h_file = au_read_pre(file, /*keep_fi*/1);
- err = PTR_ERR(h_file);
- if (IS_ERR(h_file))
- goto out;
-
- if (au_test_loopback_kthread()) {
- au_warn_loopback(h_file->f_path.dentry->d_sb);
- if (file->f_mapping != h_file->f_mapping) {
- file->f_mapping = h_file->f_mapping;
- smp_mb(); /* unnecessary? */
- }
- }
- fi_read_unlock(file);
-
- err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
- /* todo: necessasry? */
- /* file->f_ra = h_file->f_ra; */
- au_read_post(inode, h_file);
-
-out:
- si_read_unlock(sb);
- return err;
-}
-
-static ssize_t
-aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
- size_t len, unsigned int flags)
-{
- ssize_t err;
- struct au_write_pre wpre;
- struct inode *inode;
- struct file *h_file;
-
- inode = file_inode(file);
- au_mtx_and_read_lock(inode);
-
- h_file = au_write_pre(file, /*do_ready*/1, &wpre);
- err = PTR_ERR(h_file);
- if (IS_ERR(h_file))
- goto out;
-
- err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
- au_write_post(inode, h_file, &wpre, err);
-
-out:
- si_read_unlock(inode->i_sb);
- mutex_unlock(&inode->i_mutex);
- return err;
-}
-
-static long aufs_fallocate(struct file *file, int mode, loff_t offset,
- loff_t len)
-{
- long err;
- struct au_write_pre wpre;
- struct inode *inode;
- struct file *h_file;
-
- inode = file_inode(file);
- au_mtx_and_read_lock(inode);
-
- h_file = au_write_pre(file, /*do_ready*/1, &wpre);
- err = PTR_ERR(h_file);
- if (IS_ERR(h_file))
- goto out;
-
- lockdep_off();
- err = vfs_fallocate(h_file, mode, offset, len);
- lockdep_on();
- au_write_post(inode, h_file, &wpre, /*written*/1);
-
-out:
- si_read_unlock(inode->i_sb);
- mutex_unlock(&inode->i_mutex);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * The locking order around current->mmap_sem.
- * - in most and regular cases
- * file I/O syscall -- aufs_read() or something
- * -- si_rwsem for read -- mmap_sem
- * (Note that [fdi]i_rwsem are released before mmap_sem).
- * - in mmap case
- * mmap(2) -- mmap_sem -- aufs_mmap() -- si_rwsem for read -- [fdi]i_rwsem
- * This AB-BA order is definitly bad, but is not a problem since "si_rwsem for
- * read" allows muliple processes to acquire it and [fdi]i_rwsem are not held in
- * file I/O. Aufs needs to stop lockdep in aufs_mmap() though.
- * It means that when aufs acquires si_rwsem for write, the process should never
- * acquire mmap_sem.
- *
- * Actually aufs_iterate() holds [fdi]i_rwsem before mmap_sem, but this is not a
- * problem either since any directory is not able to be mmap-ed.
- * The similar scenario is applied to aufs_readlink() too.
- */
-
-#if 0 /* stop calling security_file_mmap() */
-/* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */
-#define AuConv_VM_PROT(f, b) _calc_vm_trans(f, VM_##b, PROT_##b)
-
-static unsigned long au_arch_prot_conv(unsigned long flags)
-{
- /* currently ppc64 only */
-#ifdef CONFIG_PPC64
- /* cf. linux/arch/powerpc/include/asm/mman.h */
- AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO);
- return AuConv_VM_PROT(flags, SAO);
-#else
- AuDebugOn(arch_calc_vm_prot_bits(-1));
- return 0;
-#endif
-}
-
-static unsigned long au_prot_conv(unsigned long flags)
-{
- return AuConv_VM_PROT(flags, READ)
- | AuConv_VM_PROT(flags, WRITE)
- | AuConv_VM_PROT(flags, EXEC)
- | au_arch_prot_conv(flags);
-}
-
-/* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */
-#define AuConv_VM_MAP(f, b) _calc_vm_trans(f, VM_##b, MAP_##b)
-
-static unsigned long au_flag_conv(unsigned long flags)
-{
- return AuConv_VM_MAP(flags, GROWSDOWN)
- | AuConv_VM_MAP(flags, DENYWRITE)
- | AuConv_VM_MAP(flags, LOCKED);
-}
-#endif
-
-static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
-{
- int err;
- const unsigned char wlock
- = (file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
- struct super_block *sb;
- struct file *h_file;
- struct inode *inode;
-
- AuDbgVmRegion(file, vma);
-
- inode = file_inode(file);
- sb = inode->i_sb;
- lockdep_off();
- si_read_lock(sb, AuLock_NOPLMW);
-
- h_file = au_write_pre(file, wlock, /*wpre*/NULL);
- lockdep_on();
- err = PTR_ERR(h_file);
- if (IS_ERR(h_file))
- goto out;
-
- err = 0;
- au_set_mmapped(file);
- au_vm_file_reset(vma, h_file);
- /*
- * we cannot call security_mmap_file() here since it may acquire
- * mmap_sem or i_mutex.
- *
- * err = security_mmap_file(h_file, au_prot_conv(vma->vm_flags),
- * au_flag_conv(vma->vm_flags));
- */
- if (!err)
- err = h_file->f_op->mmap(h_file, vma);
- if (!err) {
- au_vm_prfile_set(vma, file);
- fsstack_copy_attr_atime(inode, file_inode(h_file));
- goto out_fput; /* success */
- }
- au_unset_mmapped(file);
- au_vm_file_reset(vma, file);
-
-out_fput:
- lockdep_off();
- ii_write_unlock(inode);
- lockdep_on();
- fput(h_file);
-out:
- lockdep_off();
- si_read_unlock(sb);
- lockdep_on();
- AuTraceErr(err);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static int aufs_fsync_nondir(struct file *file, loff_t start, loff_t end,
- int datasync)
-{
- int err;
- struct au_write_pre wpre;
- struct inode *inode;
- struct file *h_file;
-
- err = 0; /* -EBADF; */ /* posix? */
- if (unlikely(!(file->f_mode & FMODE_WRITE)))
- goto out;
-
- inode = file_inode(file);
- au_mtx_and_read_lock(inode);
-
- h_file = au_write_pre(file, /*do_ready*/1, &wpre);
- err = PTR_ERR(h_file);
- if (IS_ERR(h_file))
- goto out_unlock;
-
- err = vfsub_fsync(h_file, &h_file->f_path, datasync);
- au_write_post(inode, h_file, &wpre, /*written*/0);
-
-out_unlock:
- si_read_unlock(inode->i_sb);
- mutex_unlock(&inode->i_mutex);
-out:
- return err;
-}
-
-/* no one supports this operation, currently */
-#if 0
-static int aufs_aio_fsync_nondir(struct kiocb *kio, int datasync)
-{
- int err;
- struct au_write_pre wpre;
- struct inode *inode;
- struct file *file, *h_file;
-
- err = 0; /* -EBADF; */ /* posix? */
- if (unlikely(!(file->f_mode & FMODE_WRITE)))
- goto out;
-
- file = kio->ki_filp;
- inode = file_inode(file);
- au_mtx_and_read_lock(inode);
-
- h_file = au_write_pre(file, /*do_ready*/1, &wpre);
- err = PTR_ERR(h_file);
- if (IS_ERR(h_file))
- goto out_unlock;
-
- err = -ENOSYS;
- h_file = au_hf_top(file);
- if (h_file->f_op->aio_fsync) {
- struct mutex *h_mtx;
-
- h_mtx = &file_inode(h_file)->i_mutex;
- if (!is_sync_kiocb(kio)) {
- get_file(h_file);
- fput(file);
- }
- kio->ki_filp = h_file;
- err = h_file->f_op->aio_fsync(kio, datasync);
- mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
- if (!err)
- vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
- /*ignore*/
- mutex_unlock(h_mtx);
- }
- au_write_post(inode, h_file, &wpre, /*written*/0);
-
-out_unlock:
- si_read_unlock(inode->sb);
- mutex_unlock(&inode->i_mutex);
-out:
- return err;
-}
-#endif
-
-static int aufs_fasync(int fd, struct file *file, int flag)
-{
- int err;
- struct file *h_file;
- struct super_block *sb;
-
- sb = file->f_path.dentry->d_sb;
- si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
-
- h_file = au_read_pre(file, /*keep_fi*/0);
- err = PTR_ERR(h_file);
- if (IS_ERR(h_file))
- goto out;
-
- if (h_file->f_op->fasync)
- err = h_file->f_op->fasync(fd, h_file, flag);
- fput(h_file); /* instead of au_read_post() */
-
-out:
- si_read_unlock(sb);
- return err;
-}
-
-static int aufs_setfl(struct file *file, unsigned long arg)
-{
- int err;
- struct file *h_file;
- struct super_block *sb;
-
- sb = file->f_path.dentry->d_sb;
- si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
-
- h_file = au_read_pre(file, /*keep_fi*/0);
- err = PTR_ERR(h_file);
- if (IS_ERR(h_file))
- goto out;
-
- arg |= vfsub_file_flags(file) & FASYNC; /* stop calling h_file->fasync */
- err = setfl(/*unused fd*/-1, h_file, arg);
- fput(h_file); /* instead of au_read_post() */
-
-out:
- si_read_unlock(sb);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* no one supports this operation, currently */
-#if 0
-static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
- size_t len, loff_t *pos, int more)
-{
-}
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-const struct file_operations aufs_file_fop = {
- .owner = THIS_MODULE,
-
- .llseek = default_llseek,
-
- .read = aufs_read,
- .write = aufs_write,
- .read_iter = aufs_read_iter,
- .write_iter = aufs_write_iter,
-
-#ifdef CONFIG_AUFS_POLL
- .poll = aufs_poll,
-#endif
- .unlocked_ioctl = aufs_ioctl_nondir,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = aufs_compat_ioctl_nondir,
-#endif
- .mmap = aufs_mmap,
- .open = aufs_open_nondir,
- .flush = aufs_flush_nondir,
- .release = aufs_release_nondir,
- .fsync = aufs_fsync_nondir,
- /* .aio_fsync = aufs_aio_fsync_nondir, */
- .fasync = aufs_fasync,
- /* .sendpage = aufs_sendpage, */
- .setfl = aufs_setfl,
- .splice_write = aufs_splice_write,
- .splice_read = aufs_splice_read,
-#if 0
- .aio_splice_write = aufs_aio_splice_write,
- .aio_splice_read = aufs_aio_splice_read,
-#endif
- .fallocate = aufs_fallocate
-};
diff --git a/fs/aufs/fhsm.c b/fs/aufs/fhsm.c
deleted file mode 100644
index db079d6ee..000000000
--- a/fs/aufs/fhsm.c
+++ /dev/null
@@ -1,412 +0,0 @@
-/*
- * Copyright (C) 2011-2016 Junjiro R. Okajima
- */
-
-/*
- * File-based Hierarchy Storage Management
- */
-
-#include <linux/anon_inodes.h>
-#include <linux/poll.h>
-#include <linux/seq_file.h>
-#include <linux/statfs.h>
-#include "aufs.h"
-
-static aufs_bindex_t au_fhsm_bottom(struct super_block *sb)
-{
- struct au_sbinfo *sbinfo;
- struct au_fhsm *fhsm;
-
- SiMustAnyLock(sb);
-
- sbinfo = au_sbi(sb);
- fhsm = &sbinfo->si_fhsm;
- AuDebugOn(!fhsm);
- return fhsm->fhsm_bottom;
-}
-
-void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex)
-{
- struct au_sbinfo *sbinfo;
- struct au_fhsm *fhsm;
-
- SiMustWriteLock(sb);
-
- sbinfo = au_sbi(sb);
- fhsm = &sbinfo->si_fhsm;
- AuDebugOn(!fhsm);
- fhsm->fhsm_bottom = bindex;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static int au_fhsm_test_jiffy(struct au_sbinfo *sbinfo, struct au_branch *br)
-{
- struct au_br_fhsm *bf;
-
- bf = br->br_fhsm;
- MtxMustLock(&bf->bf_lock);
-
- return !bf->bf_readable
- || time_after(jiffies,
- bf->bf_jiffy + sbinfo->si_fhsm.fhsm_expire);
-}
-
-/* ---------------------------------------------------------------------- */
-
-static void au_fhsm_notify(struct super_block *sb, int val)
-{
- struct au_sbinfo *sbinfo;
- struct au_fhsm *fhsm;
-
- SiMustAnyLock(sb);
-
- sbinfo = au_sbi(sb);
- fhsm = &sbinfo->si_fhsm;
- if (au_fhsm_pid(fhsm)
- && atomic_read(&fhsm->fhsm_readable) != -1) {
- atomic_set(&fhsm->fhsm_readable, val);
- if (val)
- wake_up(&fhsm->fhsm_wqh);
- }
-}
-
-static int au_fhsm_stfs(struct super_block *sb, aufs_bindex_t bindex,
- struct aufs_stfs *rstfs, int do_lock, int do_notify)
-{
- int err;
- struct au_branch *br;
- struct au_br_fhsm *bf;
-
- br = au_sbr(sb, bindex);
- AuDebugOn(au_br_rdonly(br));
- bf = br->br_fhsm;
- AuDebugOn(!bf);
-
- if (do_lock)
- mutex_lock(&bf->bf_lock);
- else
- MtxMustLock(&bf->bf_lock);
-
- /* sb->s_root for NFS is unreliable */
- err = au_br_stfs(br, &bf->bf_stfs);
- if (unlikely(err)) {
- AuErr1("FHSM failed (%d), b%d, ignored.\n", bindex, err);
- goto out;
- }
-
- bf->bf_jiffy = jiffies;
- bf->bf_readable = 1;
- if (do_notify)
- au_fhsm_notify(sb, /*val*/1);
- if (rstfs)
- *rstfs = bf->bf_stfs;
-
-out:
- if (do_lock)
- mutex_unlock(&bf->bf_lock);
- au_fhsm_notify(sb, /*val*/1);
-
- return err;
-}
-
-void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force)
-{
- int err;
- struct au_sbinfo *sbinfo;
- struct au_fhsm *fhsm;
- struct au_branch *br;
- struct au_br_fhsm *bf;
-
- AuDbg("b%d, force %d\n", bindex, force);
- SiMustAnyLock(sb);
-
- sbinfo = au_sbi(sb);
- fhsm = &sbinfo->si_fhsm;
- if (!au_ftest_si(sbinfo, FHSM)
- || fhsm->fhsm_bottom == bindex)
- return;
-
- br = au_sbr(sb, bindex);
- bf = br->br_fhsm;
- AuDebugOn(!bf);
- mutex_lock(&bf->bf_lock);
- if (force
- || au_fhsm_pid(fhsm)
- || au_fhsm_test_jiffy(sbinfo, br))
- err = au_fhsm_stfs(sb, bindex, /*rstfs*/NULL, /*do_lock*/0,
- /*do_notify*/1);
- mutex_unlock(&bf->bf_lock);
-}
-
-void au_fhsm_wrote_all(struct super_block *sb, int force)
-{
- aufs_bindex_t bindex, bend;
- struct au_branch *br;
-
- /* exclude the bottom */
- bend = au_fhsm_bottom(sb);
- for (bindex = 0; bindex < bend; bindex++) {
- br = au_sbr(sb, bindex);
- if (au_br_fhsm(br->br_perm))
- au_fhsm_wrote(sb, bindex, force);
- }
-}
-
-/* ---------------------------------------------------------------------- */
-
-static unsigned int au_fhsm_poll(struct file *file,
- struct poll_table_struct *wait)
-{
- unsigned int mask;
- struct au_sbinfo *sbinfo;
- struct au_fhsm *fhsm;
-
- mask = 0;
- sbinfo = file->private_data;
- fhsm = &sbinfo->si_fhsm;
- poll_wait(file, &fhsm->fhsm_wqh, wait);
- if (atomic_read(&fhsm->fhsm_readable))
- mask = POLLIN /* | POLLRDNORM */;
-
- AuTraceErr((int)mask);
- return mask;
-}
-
-static int au_fhsm_do_read_one(struct aufs_stbr __user *stbr,
- struct aufs_stfs *stfs, __s16 brid)
-{
- int err;
-
- err = copy_to_user(&stbr->stfs, stfs, sizeof(*stfs));
- if (!err)
- err = __put_user(brid, &stbr->brid);
- if (unlikely(err))
- err = -EFAULT;
-
- return err;
-}
-
-static ssize_t au_fhsm_do_read(struct super_block *sb,
- struct aufs_stbr __user *stbr, size_t count)
-{
- ssize_t err;
- int nstbr;
- aufs_bindex_t bindex, bend;
- struct au_branch *br;
- struct au_br_fhsm *bf;
-
- /* except the bottom branch */
- err = 0;
- nstbr = 0;
- bend = au_fhsm_bottom(sb);
- for (bindex = 0; !err && bindex < bend; bindex++) {
- br = au_sbr(sb, bindex);
- if (!au_br_fhsm(br->br_perm))
- continue;
-
- bf = br->br_fhsm;
- mutex_lock(&bf->bf_lock);
- if (bf->bf_readable) {
- err = -EFAULT;
- if (count >= sizeof(*stbr))
- err = au_fhsm_do_read_one(stbr++, &bf->bf_stfs,
- br->br_id);
- if (!err) {
- bf->bf_readable = 0;
- count -= sizeof(*stbr);
- nstbr++;
- }
- }
- mutex_unlock(&bf->bf_lock);
- }
- if (!err)
- err = sizeof(*stbr) * nstbr;
-
- return err;
-}
-
-static ssize_t au_fhsm_read(struct file *file, char __user *buf, size_t count,
- loff_t *pos)
-{
- ssize_t err;
- int readable;
- aufs_bindex_t nfhsm, bindex, bend;
- struct au_sbinfo *sbinfo;
- struct au_fhsm *fhsm;
- struct au_branch *br;
- struct super_block *sb;
-
- err = 0;
- sbinfo = file->private_data;
- fhsm = &sbinfo->si_fhsm;
-need_data:
- spin_lock_irq(&fhsm->fhsm_wqh.lock);
- if (!atomic_read(&fhsm->fhsm_readable)) {
- if (vfsub_file_flags(file) & O_NONBLOCK)
- err = -EAGAIN;
- else
- err = wait_event_interruptible_locked_irq
- (fhsm->fhsm_wqh,
- atomic_read(&fhsm->fhsm_readable));
- }
- spin_unlock_irq(&fhsm->fhsm_wqh.lock);
- if (unlikely(err))
- goto out;
-
- /* sb may already be dead */
- au_rw_read_lock(&sbinfo->si_rwsem);
- readable = atomic_read(&fhsm->fhsm_readable);
- if (readable > 0) {
- sb = sbinfo->si_sb;
- AuDebugOn(!sb);
- /* exclude the bottom branch */
- nfhsm = 0;
- bend = au_fhsm_bottom(sb);
- for (bindex = 0; bindex < bend; bindex++) {
- br = au_sbr(sb, bindex);
- if (au_br_fhsm(br->br_perm))
- nfhsm++;
- }
- err = -EMSGSIZE;
- if (nfhsm * sizeof(struct aufs_stbr) <= count) {
- atomic_set(&fhsm->fhsm_readable, 0);
- err = au_fhsm_do_read(sbinfo->si_sb, (void __user *)buf,
- count);
- }
- }
- au_rw_read_unlock(&sbinfo->si_rwsem);
- if (!readable)
- goto need_data;
-
-out:
- return err;
-}
-
-static int au_fhsm_release(struct inode *inode, struct file *file)
-{
- struct au_sbinfo *sbinfo;
- struct au_fhsm *fhsm;
-
- /* sb may already be dead */
- sbinfo = file->private_data;
- fhsm = &sbinfo->si_fhsm;
- spin_lock(&fhsm->fhsm_spin);
- fhsm->fhsm_pid = 0;
- spin_unlock(&fhsm->fhsm_spin);
- kobject_put(&sbinfo->si_kobj);
-
- return 0;
-}
-
-static const struct file_operations au_fhsm_fops = {
- .owner = THIS_MODULE,
- .llseek = noop_llseek,
- .read = au_fhsm_read,
- .poll = au_fhsm_poll,
- .release = au_fhsm_release
-};
-
-int au_fhsm_fd(struct super_block *sb, int oflags)
-{
- int err, fd;
- struct au_sbinfo *sbinfo;
- struct au_fhsm *fhsm;
-
- err = -EPERM;
- if (unlikely(!capable(CAP_SYS_ADMIN)))
- goto out;
-
- err = -EINVAL;
- if (unlikely(oflags & ~(O_CLOEXEC | O_NONBLOCK)))
- goto out;
-
- err = 0;
- sbinfo = au_sbi(sb);
- fhsm = &sbinfo->si_fhsm;
- spin_lock(&fhsm->fhsm_spin);
- if (!fhsm->fhsm_pid)
- fhsm->fhsm_pid = current->pid;
- else
- err = -EBUSY;
- spin_unlock(&fhsm->fhsm_spin);
- if (unlikely(err))
- goto out;
-
- oflags |= O_RDONLY;
- /* oflags |= FMODE_NONOTIFY; */
- fd = anon_inode_getfd("[aufs_fhsm]", &au_fhsm_fops, sbinfo, oflags);
- err = fd;
- if (unlikely(fd < 0))
- goto out_pid;
-
- /* succeed reglardless 'fhsm' status */
- kobject_get(&sbinfo->si_kobj);
- si_noflush_read_lock(sb);
- if (au_ftest_si(sbinfo, FHSM))
- au_fhsm_wrote_all(sb, /*force*/0);
- si_read_unlock(sb);
- goto out; /* success */
-
-out_pid:
- spin_lock(&fhsm->fhsm_spin);
- fhsm->fhsm_pid = 0;
- spin_unlock(&fhsm->fhsm_spin);
-out:
- AuTraceErr(err);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-int au_fhsm_br_alloc(struct au_branch *br)
-{
- int err;
-
- err = 0;
- br->br_fhsm = kmalloc(sizeof(*br->br_fhsm), GFP_NOFS);
- if (br->br_fhsm)
- au_br_fhsm_init(br->br_fhsm);
- else
- err = -ENOMEM;
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-void au_fhsm_fin(struct super_block *sb)
-{
- au_fhsm_notify(sb, /*val*/-1);
-}
-
-void au_fhsm_init(struct au_sbinfo *sbinfo)
-{
- struct au_fhsm *fhsm;
-
- fhsm = &sbinfo->si_fhsm;
- spin_lock_init(&fhsm->fhsm_spin);
- init_waitqueue_head(&fhsm->fhsm_wqh);
- atomic_set(&fhsm->fhsm_readable, 0);
- fhsm->fhsm_expire
- = msecs_to_jiffies(AUFS_FHSM_CACHE_DEF_SEC * MSEC_PER_SEC);
- fhsm->fhsm_bottom = -1;
-}
-
-void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec)
-{
- sbinfo->si_fhsm.fhsm_expire
- = msecs_to_jiffies(sec * MSEC_PER_SEC);
-}
-
-void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo)
-{
- unsigned int u;
-
- if (!au_ftest_si(sbinfo, FHSM))
- return;
-
- u = jiffies_to_msecs(sbinfo->si_fhsm.fhsm_expire) / MSEC_PER_SEC;
- if (u != AUFS_FHSM_CACHE_DEF_SEC)
- seq_printf(seq, ",fhsm_sec=%u", u);
-}
diff --git a/fs/aufs/file.c b/fs/aufs/file.c
deleted file mode 100644
index 6b8a66b4a..000000000
--- a/fs/aufs/file.c
+++ /dev/null
@@ -1,831 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * handling file/dir, and address_space operation
- */
-
-#ifdef CONFIG_AUFS_DEBUG
-#include <linux/migrate.h>
-#endif
-#include <linux/pagemap.h>
-#include "aufs.h"
-
-/* drop flags for writing */
-unsigned int au_file_roflags(unsigned int flags)
-{
- flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
- flags |= O_RDONLY | O_NOATIME;
- return flags;
-}
-
-/* common functions to regular file and dir */
-struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
- struct file *file, int force_wr)
-{
- struct file *h_file;
- struct dentry *h_dentry;
- struct inode *h_inode;
- struct super_block *sb;
- struct au_branch *br;
- struct path h_path;
- int err;
-
- /* a race condition can happen between open and unlink/rmdir */
- h_file = ERR_PTR(-ENOENT);
- h_dentry = au_h_dptr(dentry, bindex);
- if (au_test_nfsd() && (!h_dentry || d_is_negative(h_dentry)))
- goto out;
- h_inode = d_inode(h_dentry);
- spin_lock(&h_dentry->d_lock);
- err = (!d_unhashed(dentry) && d_unlinked(h_dentry))
- /* || !d_inode(dentry)->i_nlink */
- ;
- spin_unlock(&h_dentry->d_lock);
- if (unlikely(err))
- goto out;
-
- sb = dentry->d_sb;
- br = au_sbr(sb, bindex);
- err = au_br_test_oflag(flags, br);
- h_file = ERR_PTR(err);
- if (unlikely(err))
- goto out;
-
- /* drop flags for writing */
- if (au_test_ro(sb, bindex, d_inode(dentry))) {
- if (force_wr && !(flags & O_WRONLY))
- force_wr = 0;
- flags = au_file_roflags(flags);
- if (force_wr) {
- h_file = ERR_PTR(-EROFS);
- flags = au_file_roflags(flags);
- if (unlikely(vfsub_native_ro(h_inode)
- || IS_APPEND(h_inode)))
- goto out;
- flags &= ~O_ACCMODE;
- flags |= O_WRONLY;
- }
- }
- flags &= ~O_CREAT;
- atomic_inc(&br->br_count);
- h_path.dentry = h_dentry;
- h_path.mnt = au_br_mnt(br);
- h_file = vfsub_dentry_open(&h_path, flags);
- if (IS_ERR(h_file))
- goto out_br;
-
- if (flags & __FMODE_EXEC) {
- err = deny_write_access(h_file);
- if (unlikely(err)) {
- fput(h_file);
- h_file = ERR_PTR(err);
- goto out_br;
- }
- }
- fsnotify_open(h_file);
- goto out; /* success */
-
-out_br:
- atomic_dec(&br->br_count);
-out:
- return h_file;
-}
-
-static int au_cmoo(struct dentry *dentry)
-{
- int err, cmoo;
- unsigned int udba;
- struct path h_path;
- struct au_pin pin;
- struct au_cp_generic cpg = {
- .dentry = dentry,
- .bdst = -1,
- .bsrc = -1,
- .len = -1,
- .pin = &pin,
- .flags = AuCpup_DTIME | AuCpup_HOPEN
- };
- struct inode *delegated;
- struct super_block *sb;
- struct au_sbinfo *sbinfo;
- struct au_fhsm *fhsm;
- pid_t pid;
- struct au_branch *br;
- struct dentry *parent;
- struct au_hinode *hdir;
-
- DiMustWriteLock(dentry);
- IiMustWriteLock(d_inode(dentry));
-
- err = 0;
- if (IS_ROOT(dentry))
- goto out;
- cpg.bsrc = au_dbstart(dentry);
- if (!cpg.bsrc)
- goto out;
-
- sb = dentry->d_sb;
- sbinfo = au_sbi(sb);
- fhsm = &sbinfo->si_fhsm;
- pid = au_fhsm_pid(fhsm);
- if (pid
- && (current->pid == pid
- || current->real_parent->pid == pid))
- goto out;
-
- br = au_sbr(sb, cpg.bsrc);
- cmoo = au_br_cmoo(br->br_perm);
- if (!cmoo)
- goto out;
- if (!d_is_reg(dentry))
- cmoo &= AuBrAttr_COO_ALL;
- if (!cmoo)
- goto out;
-
- parent = dget_parent(dentry);
- di_write_lock_parent(parent);
- err = au_wbr_do_copyup_bu(dentry, cpg.bsrc - 1);
- cpg.bdst = err;
- if (unlikely(err < 0)) {
- err = 0; /* there is no upper writable branch */
- goto out_dgrade;
- }
- AuDbg("bsrc %d, bdst %d\n", cpg.bsrc, cpg.bdst);
-
- /* do not respect the coo attrib for the target branch */
- err = au_cpup_dirs(dentry, cpg.bdst);
- if (unlikely(err))
- goto out_dgrade;
-
- di_downgrade_lock(parent, AuLock_IR);
- udba = au_opt_udba(sb);
- err = au_pin(&pin, dentry, cpg.bdst, udba,
- AuPin_DI_LOCKED | AuPin_MNT_WRITE);
- if (unlikely(err))
- goto out_parent;
-
- err = au_sio_cpup_simple(&cpg);
- au_unpin(&pin);
- if (unlikely(err))
- goto out_parent;
- if (!(cmoo & AuBrWAttr_MOO))
- goto out_parent; /* success */
-
- err = au_pin(&pin, dentry, cpg.bsrc, udba,
- AuPin_DI_LOCKED | AuPin_MNT_WRITE);
- if (unlikely(err))
- goto out_parent;
-
- h_path.mnt = au_br_mnt(br);
- h_path.dentry = au_h_dptr(dentry, cpg.bsrc);
- hdir = au_hi(d_inode(parent), cpg.bsrc);
- delegated = NULL;
- err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated, /*force*/1);
- au_unpin(&pin);
- /* todo: keep h_dentry or not? */
- if (unlikely(err == -EWOULDBLOCK)) {
- pr_warn("cannot retry for NFSv4 delegation"
- " for an internal unlink\n");
- iput(delegated);
- }
- if (unlikely(err)) {
- pr_err("unlink %pd after coo failed (%d), ignored\n",
- dentry, err);
- err = 0;
- }
- goto out_parent; /* success */
-
-out_dgrade:
- di_downgrade_lock(parent, AuLock_IR);
-out_parent:
- di_read_unlock(parent, AuLock_IR);
- dput(parent);
-out:
- AuTraceErr(err);
- return err;
-}
-
-int au_do_open(struct file *file, struct au_do_open_args *args)
-{
- int err, no_lock = args->no_lock;
- struct dentry *dentry;
- struct au_finfo *finfo;
-
- if (!no_lock)
- err = au_finfo_init(file, args->fidir);
- else {
- lockdep_off();
- err = au_finfo_init(file, args->fidir);
- lockdep_on();
- }
- if (unlikely(err))
- goto out;
-
- dentry = file->f_path.dentry;
- AuDebugOn(IS_ERR_OR_NULL(dentry));
- if (!no_lock) {
- di_write_lock_child(dentry);
- err = au_cmoo(dentry);
- di_downgrade_lock(dentry, AuLock_IR);
- if (!err)
- err = args->open(file, vfsub_file_flags(file), NULL);
- di_read_unlock(dentry, AuLock_IR);
- } else {
- err = au_cmoo(dentry);
- if (!err)
- err = args->open(file, vfsub_file_flags(file),
- args->h_file);
- if (!err && au_fbstart(file) != au_dbstart(dentry))
- /*
- * cmoo happens after h_file was opened.
- * need to refresh file later.
- */
- atomic_dec(&au_fi(file)->fi_generation);
- }
-
- finfo = au_fi(file);
- if (!err) {
- finfo->fi_file = file;
- au_sphl_add(&finfo->fi_hlist,
- &au_sbi(file->f_path.dentry->d_sb)->si_files);
- }
- if (!no_lock)
- fi_write_unlock(file);
- else {
- lockdep_off();
- fi_write_unlock(file);
- lockdep_on();
- }
- if (unlikely(err)) {
- finfo->fi_hdir = NULL;
- au_finfo_fin(file);
- }
-
-out:
- return err;
-}
-
-int au_reopen_nondir(struct file *file)
-{
- int err;
- aufs_bindex_t bstart;
- struct dentry *dentry;
- struct file *h_file, *h_file_tmp;
-
- dentry = file->f_path.dentry;
- bstart = au_dbstart(dentry);
- h_file_tmp = NULL;
- if (au_fbstart(file) == bstart) {
- h_file = au_hf_top(file);
- if (file->f_mode == h_file->f_mode)
- return 0; /* success */
- h_file_tmp = h_file;
- get_file(h_file_tmp);
- au_set_h_fptr(file, bstart, NULL);
- }
- AuDebugOn(au_fi(file)->fi_hdir);
- /*
- * it can happen
- * file exists on both of rw and ro
- * open --> dbstart and fbstart are both 0
- * prepend a branch as rw, "rw" become ro
- * remove rw/file
- * delete the top branch, "rw" becomes rw again
- * --> dbstart is 1, fbstart is still 0
- * write --> fbstart is 0 but dbstart is 1
- */
- /* AuDebugOn(au_fbstart(file) < bstart); */
-
- h_file = au_h_open(dentry, bstart, vfsub_file_flags(file) & ~O_TRUNC,
- file, /*force_wr*/0);
- err = PTR_ERR(h_file);
- if (IS_ERR(h_file)) {
- if (h_file_tmp) {
- atomic_inc(&au_sbr(dentry->d_sb, bstart)->br_count);
- au_set_h_fptr(file, bstart, h_file_tmp);
- h_file_tmp = NULL;
- }
- goto out; /* todo: close all? */
- }
-
- err = 0;
- au_set_fbstart(file, bstart);
- au_set_h_fptr(file, bstart, h_file);
- au_update_figen(file);
- /* todo: necessary? */
- /* file->f_ra = h_file->f_ra; */
-
-out:
- if (h_file_tmp)
- fput(h_file_tmp);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
- struct dentry *hi_wh)
-{
- int err;
- aufs_bindex_t bstart;
- struct au_dinfo *dinfo;
- struct dentry *h_dentry;
- struct au_hdentry *hdp;
-
- dinfo = au_di(file->f_path.dentry);
- AuRwMustWriteLock(&dinfo->di_rwsem);
-
- bstart = dinfo->di_bstart;
- dinfo->di_bstart = btgt;
- hdp = dinfo->di_hdentry;
- h_dentry = hdp[0 + btgt].hd_dentry;
- hdp[0 + btgt].hd_dentry = hi_wh;
- err = au_reopen_nondir(file);
- hdp[0 + btgt].hd_dentry = h_dentry;
- dinfo->di_bstart = bstart;
-
- return err;
-}
-
-static int au_ready_to_write_wh(struct file *file, loff_t len,
- aufs_bindex_t bcpup, struct au_pin *pin)
-{
- int err;
- struct inode *inode, *h_inode;
- struct dentry *h_dentry, *hi_wh;
- struct au_cp_generic cpg = {
- .dentry = file->f_path.dentry,
- .bdst = bcpup,
- .bsrc = -1,
- .len = len,
- .pin = pin
- };
-
- au_update_dbstart(cpg.dentry);
- inode = d_inode(cpg.dentry);
- h_inode = NULL;
- if (au_dbstart(cpg.dentry) <= bcpup
- && au_dbend(cpg.dentry) >= bcpup) {
- h_dentry = au_h_dptr(cpg.dentry, bcpup);
- if (h_dentry && d_is_positive(h_dentry))
- h_inode = d_inode(h_dentry);
- }
- hi_wh = au_hi_wh(inode, bcpup);
- if (!hi_wh && !h_inode)
- err = au_sio_cpup_wh(&cpg, file);
- else
- /* already copied-up after unlink */
- err = au_reopen_wh(file, bcpup, hi_wh);
-
- if (!err
- && (inode->i_nlink > 1
- || (inode->i_state & I_LINKABLE))
- && au_opt_test(au_mntflags(cpg.dentry->d_sb), PLINK))
- au_plink_append(inode, bcpup, au_h_dptr(cpg.dentry, bcpup));
-
- return err;
-}
-
-/*
- * prepare the @file for writing.
- */
-int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
-{
- int err;
- aufs_bindex_t dbstart;
- struct dentry *parent;
- struct inode *inode;
- struct super_block *sb;
- struct file *h_file;
- struct au_cp_generic cpg = {
- .dentry = file->f_path.dentry,
- .bdst = -1,
- .bsrc = -1,
- .len = len,
- .pin = pin,
- .flags = AuCpup_DTIME
- };
-
- sb = cpg.dentry->d_sb;
- inode = d_inode(cpg.dentry);
- cpg.bsrc = au_fbstart(file);
- err = au_test_ro(sb, cpg.bsrc, inode);
- if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) {
- err = au_pin(pin, cpg.dentry, cpg.bsrc, AuOpt_UDBA_NONE,
- /*flags*/0);
- goto out;
- }
-
- /* need to cpup or reopen */
- parent = dget_parent(cpg.dentry);
- di_write_lock_parent(parent);
- err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
- cpg.bdst = err;
- if (unlikely(err < 0))
- goto out_dgrade;
- err = 0;
-
- if (!d_unhashed(cpg.dentry) && !au_h_dptr(parent, cpg.bdst)) {
- err = au_cpup_dirs(cpg.dentry, cpg.bdst);
- if (unlikely(err))
- goto out_dgrade;
- }
-
- err = au_pin(pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
- AuPin_DI_LOCKED | AuPin_MNT_WRITE);
- if (unlikely(err))
- goto out_dgrade;
-
- dbstart = au_dbstart(cpg.dentry);
- if (dbstart <= cpg.bdst)
- cpg.bsrc = cpg.bdst;
-
- if (dbstart <= cpg.bdst /* just reopen */
- || !d_unhashed(cpg.dentry) /* copyup and reopen */
- ) {
- h_file = au_h_open_pre(cpg.dentry, cpg.bsrc, /*force_wr*/0);
- if (IS_ERR(h_file))
- err = PTR_ERR(h_file);
- else {
- di_downgrade_lock(parent, AuLock_IR);
- if (dbstart > cpg.bdst)
- err = au_sio_cpup_simple(&cpg);
- if (!err)
- err = au_reopen_nondir(file);
- au_h_open_post(cpg.dentry, cpg.bsrc, h_file);
- }
- } else { /* copyup as wh and reopen */
- /*
- * since writable hfsplus branch is not supported,
- * h_open_pre/post() are unnecessary.
- */
- err = au_ready_to_write_wh(file, len, cpg.bdst, pin);
- di_downgrade_lock(parent, AuLock_IR);
- }
-
- if (!err) {
- au_pin_set_parent_lflag(pin, /*lflag*/0);
- goto out_dput; /* success */
- }
- au_unpin(pin);
- goto out_unlock;
-
-out_dgrade:
- di_downgrade_lock(parent, AuLock_IR);
-out_unlock:
- di_read_unlock(parent, AuLock_IR);
-out_dput:
- dput(parent);
-out:
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-int au_do_flush(struct file *file, fl_owner_t id,
- int (*flush)(struct file *file, fl_owner_t id))
-{
- int err;
- struct super_block *sb;
- struct inode *inode;
-
- inode = file_inode(file);
- sb = inode->i_sb;
- si_noflush_read_lock(sb);
- fi_read_lock(file);
- ii_read_lock_child(inode);
-
- err = flush(file, id);
- au_cpup_attr_timesizes(inode);
-
- ii_read_unlock(inode);
- fi_read_unlock(file);
- si_read_unlock(sb);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
-{
- int err;
- struct au_pin pin;
- struct au_finfo *finfo;
- struct dentry *parent, *hi_wh;
- struct inode *inode;
- struct super_block *sb;
- struct au_cp_generic cpg = {
- .dentry = file->f_path.dentry,
- .bdst = -1,
- .bsrc = -1,
- .len = -1,
- .pin = &pin,
- .flags = AuCpup_DTIME
- };
-
- FiMustWriteLock(file);
-
- err = 0;
- finfo = au_fi(file);
- sb = cpg.dentry->d_sb;
- inode = d_inode(cpg.dentry);
- cpg.bdst = au_ibstart(inode);
- if (cpg.bdst == finfo->fi_btop || IS_ROOT(cpg.dentry))
- goto out;
-
- parent = dget_parent(cpg.dentry);
- if (au_test_ro(sb, cpg.bdst, inode)) {
- di_read_lock_parent(parent, !AuLock_IR);
- err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
- cpg.bdst = err;
- di_read_unlock(parent, !AuLock_IR);
- if (unlikely(err < 0))
- goto out_parent;
- err = 0;
- }
-
- di_read_lock_parent(parent, AuLock_IR);
- hi_wh = au_hi_wh(inode, cpg.bdst);
- if (!S_ISDIR(inode->i_mode)
- && au_opt_test(au_mntflags(sb), PLINK)
- && au_plink_test(inode)
- && !d_unhashed(cpg.dentry)
- && cpg.bdst < au_dbstart(cpg.dentry)) {
- err = au_test_and_cpup_dirs(cpg.dentry, cpg.bdst);
- if (unlikely(err))
- goto out_unlock;
-
- /* always superio. */
- err = au_pin(&pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
- AuPin_DI_LOCKED | AuPin_MNT_WRITE);
- if (!err) {
- err = au_sio_cpup_simple(&cpg);
- au_unpin(&pin);
- }
- } else if (hi_wh) {
- /* already copied-up after unlink */
- err = au_reopen_wh(file, cpg.bdst, hi_wh);
- *need_reopen = 0;
- }
-
-out_unlock:
- di_read_unlock(parent, AuLock_IR);
-out_parent:
- dput(parent);
-out:
- return err;
-}
-
-static void au_do_refresh_dir(struct file *file)
-{
- aufs_bindex_t bindex, bend, new_bindex, brid;
- struct au_hfile *p, tmp, *q;
- struct au_finfo *finfo;
- struct super_block *sb;
- struct au_fidir *fidir;
-
- FiMustWriteLock(file);
-
- sb = file->f_path.dentry->d_sb;
- finfo = au_fi(file);
- fidir = finfo->fi_hdir;
- AuDebugOn(!fidir);
- p = fidir->fd_hfile + finfo->fi_btop;
- brid = p->hf_br->br_id;
- bend = fidir->fd_bbot;
- for (bindex = finfo->fi_btop; bindex <= bend; bindex++, p++) {
- if (!p->hf_file)
- continue;
-
- new_bindex = au_br_index(sb, p->hf_br->br_id);
- if (new_bindex == bindex)
- continue;
- if (new_bindex < 0) {
- au_set_h_fptr(file, bindex, NULL);
- continue;
- }
-
- /* swap two lower inode, and loop again */
- q = fidir->fd_hfile + new_bindex;
- tmp = *q;
- *q = *p;
- *p = tmp;
- if (tmp.hf_file) {
- bindex--;
- p--;
- }
- }
-
- p = fidir->fd_hfile;
- if (!au_test_mmapped(file) && !d_unlinked(file->f_path.dentry)) {
- bend = au_sbend(sb);
- for (finfo->fi_btop = 0; finfo->fi_btop <= bend;
- finfo->fi_btop++, p++)
- if (p->hf_file) {
- if (file_inode(p->hf_file))
- break;
- au_hfput(p, file);
- }
- } else {
- bend = au_br_index(sb, brid);
- for (finfo->fi_btop = 0; finfo->fi_btop < bend;
- finfo->fi_btop++, p++)
- if (p->hf_file)
- au_hfput(p, file);
- bend = au_sbend(sb);
- }
-
- p = fidir->fd_hfile + bend;
- for (fidir->fd_bbot = bend; fidir->fd_bbot >= finfo->fi_btop;
- fidir->fd_bbot--, p--)
- if (p->hf_file) {
- if (file_inode(p->hf_file))
- break;
- au_hfput(p, file);
- }
- AuDebugOn(fidir->fd_bbot < finfo->fi_btop);
-}
-
-/*
- * after branch manipulating, refresh the file.
- */
-static int refresh_file(struct file *file, int (*reopen)(struct file *file))
-{
- int err, need_reopen;
- aufs_bindex_t bend, bindex;
- struct dentry *dentry;
- struct au_finfo *finfo;
- struct au_hfile *hfile;
-
- dentry = file->f_path.dentry;
- finfo = au_fi(file);
- if (!finfo->fi_hdir) {
- hfile = &finfo->fi_htop;
- AuDebugOn(!hfile->hf_file);
- bindex = au_br_index(dentry->d_sb, hfile->hf_br->br_id);
- AuDebugOn(bindex < 0);
- if (bindex != finfo->fi_btop)
- au_set_fbstart(file, bindex);
- } else {
- err = au_fidir_realloc(finfo, au_sbend(dentry->d_sb) + 1);
- if (unlikely(err))
- goto out;
- au_do_refresh_dir(file);
- }
-
- err = 0;
- need_reopen = 1;
- if (!au_test_mmapped(file))
- err = au_file_refresh_by_inode(file, &need_reopen);
- if (!err && need_reopen && !d_unlinked(dentry))
- err = reopen(file);
- if (!err) {
- au_update_figen(file);
- goto out; /* success */
- }
-
- /* error, close all lower files */
- if (finfo->fi_hdir) {
- bend = au_fbend_dir(file);
- for (bindex = au_fbstart(file); bindex <= bend; bindex++)
- au_set_h_fptr(file, bindex, NULL);
- }
-
-out:
- return err;
-}
-
-/* common function to regular file and dir */
-int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
- int wlock)
-{
- int err;
- unsigned int sigen, figen;
- aufs_bindex_t bstart;
- unsigned char pseudo_link;
- struct dentry *dentry;
- struct inode *inode;
-
- err = 0;
- dentry = file->f_path.dentry;
- inode = d_inode(dentry);
- sigen = au_sigen(dentry->d_sb);
- fi_write_lock(file);
- figen = au_figen(file);
- di_write_lock_child(dentry);
- bstart = au_dbstart(dentry);
- pseudo_link = (bstart != au_ibstart(inode));
- if (sigen == figen && !pseudo_link && au_fbstart(file) == bstart) {
- if (!wlock) {
- di_downgrade_lock(dentry, AuLock_IR);
- fi_downgrade_lock(file);
- }
- goto out; /* success */
- }
-
- AuDbg("sigen %d, figen %d\n", sigen, figen);
- if (au_digen_test(dentry, sigen)) {
- err = au_reval_dpath(dentry, sigen);
- AuDebugOn(!err && au_digen_test(dentry, sigen));
- }
-
- if (!err)
- err = refresh_file(file, reopen);
- if (!err) {
- if (!wlock) {
- di_downgrade_lock(dentry, AuLock_IR);
- fi_downgrade_lock(file);
- }
- } else {
- di_write_unlock(dentry);
- fi_write_unlock(file);
- }
-
-out:
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* cf. aufs_nopage() */
-/* for madvise(2) */
-static int aufs_readpage(struct file *file __maybe_unused, struct page *page)
-{
- unlock_page(page);
- return 0;
-}
-
-/* it will never be called, but necessary to support O_DIRECT */
-static ssize_t aufs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
- loff_t offset)
-{ BUG(); return 0; }
-
-/* they will never be called. */
-#ifdef CONFIG_AUFS_DEBUG
-static int aufs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata)
-{ AuUnsupport(); return 0; }
-static int aufs_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
-{ AuUnsupport(); return 0; }
-static int aufs_writepage(struct page *page, struct writeback_control *wbc)
-{ AuUnsupport(); return 0; }
-
-static int aufs_set_page_dirty(struct page *page)
-{ AuUnsupport(); return 0; }
-static void aufs_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length)
-{ AuUnsupport(); }
-static int aufs_releasepage(struct page *page, gfp_t gfp)
-{ AuUnsupport(); return 0; }
-#if 0 /* called by memory compaction regardless file */
-static int aufs_migratepage(struct address_space *mapping, struct page *newpage,
- struct page *page, enum migrate_mode mode)
-{ AuUnsupport(); return 0; }
-#endif
-static int aufs_launder_page(struct page *page)
-{ AuUnsupport(); return 0; }
-static int aufs_is_partially_uptodate(struct page *page,
- unsigned long from,
- unsigned long count)
-{ AuUnsupport(); return 0; }
-static void aufs_is_dirty_writeback(struct page *page, bool *dirty,
- bool *writeback)
-{ AuUnsupport(); }
-static int aufs_error_remove_page(struct address_space *mapping,
- struct page *page)
-{ AuUnsupport(); return 0; }
-static int aufs_swap_activate(struct swap_info_struct *sis, struct file *file,
- sector_t *span)
-{ AuUnsupport(); return 0; }
-static void aufs_swap_deactivate(struct file *file)
-{ AuUnsupport(); }
-#endif /* CONFIG_AUFS_DEBUG */
-
-const struct address_space_operations aufs_aop = {
- .readpage = aufs_readpage,
- .direct_IO = aufs_direct_IO,
-#ifdef CONFIG_AUFS_DEBUG
- .writepage = aufs_writepage,
- /* no writepages, because of writepage */
- .set_page_dirty = aufs_set_page_dirty,
- /* no readpages, because of readpage */
- .write_begin = aufs_write_begin,
- .write_end = aufs_write_end,
- /* no bmap, no block device */
- .invalidatepage = aufs_invalidatepage,
- .releasepage = aufs_releasepage,
- /* is fallback_migrate_page ok? */
- /* .migratepage = aufs_migratepage, */
- .launder_page = aufs_launder_page,
- .is_partially_uptodate = aufs_is_partially_uptodate,
- .is_dirty_writeback = aufs_is_dirty_writeback,
- .error_remove_page = aufs_error_remove_page,
- .swap_activate = aufs_swap_activate,
- .swap_deactivate = aufs_swap_deactivate
-#endif /* CONFIG_AUFS_DEBUG */
-};
diff --git a/fs/aufs/file.h b/fs/aufs/file.h
deleted file mode 100644
index 27d802487..000000000
--- a/fs/aufs/file.h
+++ /dev/null
@@ -1,278 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * file operations
- */
-
-#ifndef __AUFS_FILE_H__
-#define __AUFS_FILE_H__
-
-#ifdef __KERNEL__
-
-#include <linux/file.h>
-#include <linux/fs.h>
-#include <linux/poll.h>
-#include "rwsem.h"
-
-struct au_branch;
-struct au_hfile {
- struct file *hf_file;
- struct au_branch *hf_br;
-};
-
-struct au_vdir;
-struct au_fidir {
- aufs_bindex_t fd_bbot;
- aufs_bindex_t fd_nent;
- struct au_vdir *fd_vdir_cache;
- struct au_hfile fd_hfile[];
-};
-
-static inline int au_fidir_sz(int nent)
-{
- AuDebugOn(nent < 0);
- return sizeof(struct au_fidir) + sizeof(struct au_hfile) * nent;
-}
-
-struct au_finfo {
- atomic_t fi_generation;
-
- struct au_rwsem fi_rwsem;
- aufs_bindex_t fi_btop;
-
- /* do not union them */
- struct { /* for non-dir */
- struct au_hfile fi_htop;
- atomic_t fi_mmapped;
- };
- struct au_fidir *fi_hdir; /* for dir only */
-
- struct hlist_node fi_hlist;
- struct file *fi_file; /* very ugly */
-} ____cacheline_aligned_in_smp;
-
-/* ---------------------------------------------------------------------- */
-
-/* file.c */
-extern const struct address_space_operations aufs_aop;
-unsigned int au_file_roflags(unsigned int flags);
-struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
- struct file *file, int force_wr);
-struct au_do_open_args {
- int no_lock;
- int (*open)(struct file *file, int flags,
- struct file *h_file);
- struct au_fidir *fidir;
- struct file *h_file;
-};
-int au_do_open(struct file *file, struct au_do_open_args *args);
-int au_reopen_nondir(struct file *file);
-struct au_pin;
-int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
-int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
- int wlock);
-int au_do_flush(struct file *file, fl_owner_t id,
- int (*flush)(struct file *file, fl_owner_t id));
-
-/* poll.c */
-#ifdef CONFIG_AUFS_POLL
-unsigned int aufs_poll(struct file *file, poll_table *wait);
-#endif
-
-#ifdef CONFIG_AUFS_BR_HFSPLUS
-/* hfsplus.c */
-struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
- int force_wr);
-void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
- struct file *h_file);
-#else
-AuStub(struct file *, au_h_open_pre, return NULL, struct dentry *dentry,
- aufs_bindex_t bindex, int force_wr)
-AuStubVoid(au_h_open_post, struct dentry *dentry, aufs_bindex_t bindex,
- struct file *h_file);
-#endif
-
-/* f_op.c */
-extern const struct file_operations aufs_file_fop;
-int au_do_open_nondir(struct file *file, int flags, struct file *h_file);
-int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file);
-struct file *au_read_pre(struct file *file, int keep_fi);
-
-/* finfo.c */
-void au_hfput(struct au_hfile *hf, struct file *file);
-void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
- struct file *h_file);
-
-void au_update_figen(struct file *file);
-struct au_fidir *au_fidir_alloc(struct super_block *sb);
-int au_fidir_realloc(struct au_finfo *finfo, int nbr);
-
-void au_fi_init_once(void *_fi);
-void au_finfo_fin(struct file *file);
-int au_finfo_init(struct file *file, struct au_fidir *fidir);
-
-/* ioctl.c */
-long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg);
-#ifdef CONFIG_COMPAT
-long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
- unsigned long arg);
-long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
- unsigned long arg);
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-static inline struct au_finfo *au_fi(struct file *file)
-{
- return file->private_data;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * fi_read_lock, fi_write_lock,
- * fi_read_unlock, fi_write_unlock, fi_downgrade_lock
- */
-AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem);
-
-#define FiMustNoWaiters(f) AuRwMustNoWaiters(&au_fi(f)->fi_rwsem)
-#define FiMustAnyLock(f) AuRwMustAnyLock(&au_fi(f)->fi_rwsem)
-#define FiMustWriteLock(f) AuRwMustWriteLock(&au_fi(f)->fi_rwsem)
-
-/* ---------------------------------------------------------------------- */
-
-/* todo: hard/soft set? */
-static inline aufs_bindex_t au_fbstart(struct file *file)
-{
- FiMustAnyLock(file);
- return au_fi(file)->fi_btop;
-}
-
-static inline aufs_bindex_t au_fbend_dir(struct file *file)
-{
- FiMustAnyLock(file);
- AuDebugOn(!au_fi(file)->fi_hdir);
- return au_fi(file)->fi_hdir->fd_bbot;
-}
-
-static inline struct au_vdir *au_fvdir_cache(struct file *file)
-{
- FiMustAnyLock(file);
- AuDebugOn(!au_fi(file)->fi_hdir);
- return au_fi(file)->fi_hdir->fd_vdir_cache;
-}
-
-static inline void au_set_fbstart(struct file *file, aufs_bindex_t bindex)
-{
- FiMustWriteLock(file);
- au_fi(file)->fi_btop = bindex;
-}
-
-static inline void au_set_fbend_dir(struct file *file, aufs_bindex_t bindex)
-{
- FiMustWriteLock(file);
- AuDebugOn(!au_fi(file)->fi_hdir);
- au_fi(file)->fi_hdir->fd_bbot = bindex;
-}
-
-static inline void au_set_fvdir_cache(struct file *file,
- struct au_vdir *vdir_cache)
-{
- FiMustWriteLock(file);
- AuDebugOn(!au_fi(file)->fi_hdir);
- au_fi(file)->fi_hdir->fd_vdir_cache = vdir_cache;
-}
-
-static inline struct file *au_hf_top(struct file *file)
-{
- FiMustAnyLock(file);
- AuDebugOn(au_fi(file)->fi_hdir);
- return au_fi(file)->fi_htop.hf_file;
-}
-
-static inline struct file *au_hf_dir(struct file *file, aufs_bindex_t bindex)
-{
- FiMustAnyLock(file);
- AuDebugOn(!au_fi(file)->fi_hdir);
- return au_fi(file)->fi_hdir->fd_hfile[0 + bindex].hf_file;
-}
-
-/* todo: memory barrier? */
-static inline unsigned int au_figen(struct file *f)
-{
- return atomic_read(&au_fi(f)->fi_generation);
-}
-
-static inline void au_set_mmapped(struct file *f)
-{
- if (atomic_inc_return(&au_fi(f)->fi_mmapped))
- return;
- pr_warn("fi_mmapped wrapped around\n");
- while (!atomic_inc_return(&au_fi(f)->fi_mmapped))
- ;
-}
-
-static inline void au_unset_mmapped(struct file *f)
-{
- atomic_dec(&au_fi(f)->fi_mmapped);
-}
-
-static inline int au_test_mmapped(struct file *f)
-{
- return atomic_read(&au_fi(f)->fi_mmapped);
-}
-
-/* customize vma->vm_file */
-
-static inline void au_do_vm_file_reset(struct vm_area_struct *vma,
- struct file *file)
-{
- struct file *f;
-
- f = vma->vm_file;
- get_file(file);
- vma->vm_file = file;
- fput(f);
-}
-
-#ifdef CONFIG_MMU
-#define AuDbgVmRegion(file, vma) do {} while (0)
-
-static inline void au_vm_file_reset(struct vm_area_struct *vma,
- struct file *file)
-{
- au_do_vm_file_reset(vma, file);
-}
-#else
-#define AuDbgVmRegion(file, vma) \
- AuDebugOn((vma)->vm_region && (vma)->vm_region->vm_file != (file))
-
-static inline void au_vm_file_reset(struct vm_area_struct *vma,
- struct file *file)
-{
- struct file *f;
-
- au_do_vm_file_reset(vma, file);
- f = vma->vm_region->vm_file;
- get_file(file);
- vma->vm_region->vm_file = file;
- fput(f);
-}
-#endif /* CONFIG_MMU */
-
-/* handle vma->vm_prfile */
-static inline void au_vm_prfile_set(struct vm_area_struct *vma,
- struct file *file)
-{
- get_file(file);
- vma->vm_prfile = file;
-#ifndef CONFIG_MMU
- get_file(file);
- vma->vm_region->vm_prfile = file;
-#endif
-}
-
-#endif /* __KERNEL__ */
-#endif /* __AUFS_FILE_H__ */
diff --git a/fs/aufs/finfo.c b/fs/aufs/finfo.c
deleted file mode 100644
index b5eb55dfb..000000000
--- a/fs/aufs/finfo.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * file private data
- */
-
-#include "aufs.h"
-
-void au_hfput(struct au_hfile *hf, struct file *file)
-{
- /* todo: direct access f_flags */
- if (vfsub_file_flags(file) & __FMODE_EXEC)
- allow_write_access(hf->hf_file);
- fput(hf->hf_file);
- hf->hf_file = NULL;
- atomic_dec(&hf->hf_br->br_count);
- hf->hf_br = NULL;
-}
-
-void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val)
-{
- struct au_finfo *finfo = au_fi(file);
- struct au_hfile *hf;
- struct au_fidir *fidir;
-
- fidir = finfo->fi_hdir;
- if (!fidir) {
- AuDebugOn(finfo->fi_btop != bindex);
- hf = &finfo->fi_htop;
- } else
- hf = fidir->fd_hfile + bindex;
-
- if (hf && hf->hf_file)
- au_hfput(hf, file);
- if (val) {
- FiMustWriteLock(file);
- AuDebugOn(IS_ERR_OR_NULL(file->f_path.dentry));
- hf->hf_file = val;
- hf->hf_br = au_sbr(file->f_path.dentry->d_sb, bindex);
- }
-}
-
-void au_update_figen(struct file *file)
-{
- atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_path.dentry));
- /* smp_mb(); */ /* atomic_set */
-}
-
-/* ---------------------------------------------------------------------- */
-
-struct au_fidir *au_fidir_alloc(struct super_block *sb)
-{
- struct au_fidir *fidir;
- int nbr;
-
- nbr = au_sbend(sb) + 1;
- if (nbr < 2)
- nbr = 2; /* initial allocate for 2 branches */
- fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS);
- if (fidir) {
- fidir->fd_bbot = -1;
- fidir->fd_nent = nbr;
- }
-
- return fidir;
-}
-
-int au_fidir_realloc(struct au_finfo *finfo, int nbr)
-{
- int err;
- struct au_fidir *fidir, *p;
-
- AuRwMustWriteLock(&finfo->fi_rwsem);
- fidir = finfo->fi_hdir;
- AuDebugOn(!fidir);
-
- err = -ENOMEM;
- p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr),
- GFP_NOFS);
- if (p) {
- p->fd_nent = nbr;
- finfo->fi_hdir = p;
- err = 0;
- }
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-void au_finfo_fin(struct file *file)
-{
- struct au_finfo *finfo;
-
- au_nfiles_dec(file->f_path.dentry->d_sb);
-
- finfo = au_fi(file);
- AuDebugOn(finfo->fi_hdir);
- AuRwDestroy(&finfo->fi_rwsem);
- au_cache_free_finfo(finfo);
-}
-
-void au_fi_init_once(void *_finfo)
-{
- struct au_finfo *finfo = _finfo;
- static struct lock_class_key aufs_fi;
-
- au_rw_init(&finfo->fi_rwsem);
- au_rw_class(&finfo->fi_rwsem, &aufs_fi);
-}
-
-int au_finfo_init(struct file *file, struct au_fidir *fidir)
-{
- int err;
- struct au_finfo *finfo;
- struct dentry *dentry;
-
- err = -ENOMEM;
- dentry = file->f_path.dentry;
- finfo = au_cache_alloc_finfo();
- if (unlikely(!finfo))
- goto out;
-
- err = 0;
- au_nfiles_inc(dentry->d_sb);
- /* verbose coding for lock class name */
- if (!fidir)
- au_rw_class(&finfo->fi_rwsem, au_lc_key + AuLcNonDir_FIINFO);
- else
- au_rw_class(&finfo->fi_rwsem, au_lc_key + AuLcDir_FIINFO);
- au_rw_write_lock(&finfo->fi_rwsem);
- finfo->fi_btop = -1;
- finfo->fi_hdir = fidir;
- atomic_set(&finfo->fi_generation, au_digen(dentry));
- /* smp_mb(); */ /* atomic_set */
-
- file->private_data = finfo;
-
-out:
- return err;
-}
diff --git a/fs/aufs/fstype.h b/fs/aufs/fstype.h
deleted file mode 100644
index 725b2ffff..000000000
--- a/fs/aufs/fstype.h
+++ /dev/null
@@ -1,387 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * judging filesystem type
- */
-
-#ifndef __AUFS_FSTYPE_H__
-#define __AUFS_FSTYPE_H__
-
-#ifdef __KERNEL__
-
-#include <linux/fs.h>
-#include <linux/magic.h>
-#include <linux/nfs_fs.h>
-#include <linux/romfs_fs.h>
-
-static inline int au_test_aufs(struct super_block *sb)
-{
- return sb->s_magic == AUFS_SUPER_MAGIC;
-}
-
-static inline const char *au_sbtype(struct super_block *sb)
-{
- return sb->s_type->name;
-}
-
-static inline int au_test_iso9660(struct super_block *sb __maybe_unused)
-{
-#if defined(CONFIG_ISO9660_FS) || defined(CONFIG_ISO9660_FS_MODULE)
- return sb->s_magic == ISOFS_SUPER_MAGIC;
-#else
- return 0;
-#endif
-}
-
-static inline int au_test_romfs(struct super_block *sb __maybe_unused)
-{
-#if defined(CONFIG_ROMFS_FS) || defined(CONFIG_ROMFS_FS_MODULE)
- return sb->s_magic == ROMFS_MAGIC;
-#else
- return 0;
-#endif
-}
-
-static inline int au_test_cramfs(struct super_block *sb __maybe_unused)
-{
-#if defined(CONFIG_CRAMFS) || defined(CONFIG_CRAMFS_MODULE)
- return sb->s_magic == CRAMFS_MAGIC;
-#endif
- return 0;
-}
-
-static inline int au_test_nfs(struct super_block *sb __maybe_unused)
-{
-#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE)
- return sb->s_magic == NFS_SUPER_MAGIC;
-#else
- return 0;
-#endif
-}
-
-static inline int au_test_fuse(struct super_block *sb __maybe_unused)
-{
-#if defined(CONFIG_FUSE_FS) || defined(CONFIG_FUSE_FS_MODULE)
- return sb->s_magic == FUSE_SUPER_MAGIC;
-#else
- return 0;
-#endif
-}
-
-static inline int au_test_xfs(struct super_block *sb __maybe_unused)
-{
-#if defined(CONFIG_XFS_FS) || defined(CONFIG_XFS_FS_MODULE)
- return sb->s_magic == XFS_SB_MAGIC;
-#else
- return 0;
-#endif
-}
-
-static inline int au_test_tmpfs(struct super_block *sb __maybe_unused)
-{
-#ifdef CONFIG_TMPFS
- return sb->s_magic == TMPFS_MAGIC;
-#else
- return 0;
-#endif
-}
-
-static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused)
-{
-#if defined(CONFIG_ECRYPT_FS) || defined(CONFIG_ECRYPT_FS_MODULE)
- return !strcmp(au_sbtype(sb), "ecryptfs");
-#else
- return 0;
-#endif
-}
-
-static inline int au_test_ramfs(struct super_block *sb)
-{
- return sb->s_magic == RAMFS_MAGIC;
-}
-
-static inline int au_test_ubifs(struct super_block *sb __maybe_unused)
-{
-#if defined(CONFIG_UBIFS_FS) || defined(CONFIG_UBIFS_FS_MODULE)
- return sb->s_magic == UBIFS_SUPER_MAGIC;
-#else
- return 0;
-#endif
-}
-
-static inline int au_test_procfs(struct super_block *sb __maybe_unused)
-{
-#ifdef CONFIG_PROC_FS
- return sb->s_magic == PROC_SUPER_MAGIC;
-#else
- return 0;
-#endif
-}
-
-static inline int au_test_sysfs(struct super_block *sb __maybe_unused)
-{
-#ifdef CONFIG_SYSFS
- return sb->s_magic == SYSFS_MAGIC;
-#else
- return 0;
-#endif
-}
-
-static inline int au_test_configfs(struct super_block *sb __maybe_unused)
-{
-#if defined(CONFIG_CONFIGFS_FS) || defined(CONFIG_CONFIGFS_FS_MODULE)
- return sb->s_magic == CONFIGFS_MAGIC;
-#else
- return 0;
-#endif
-}
-
-static inline int au_test_minix(struct super_block *sb __maybe_unused)
-{
-#if defined(CONFIG_MINIX_FS) || defined(CONFIG_MINIX_FS_MODULE)
- return sb->s_magic == MINIX3_SUPER_MAGIC
- || sb->s_magic == MINIX2_SUPER_MAGIC
- || sb->s_magic == MINIX2_SUPER_MAGIC2
- || sb->s_magic == MINIX_SUPER_MAGIC
- || sb->s_magic == MINIX_SUPER_MAGIC2;
-#else
- return 0;
-#endif
-}
-
-static inline int au_test_fat(struct super_block *sb __maybe_unused)
-{
-#if defined(CONFIG_FAT_FS) || defined(CONFIG_FAT_FS_MODULE)
- return sb->s_magic == MSDOS_SUPER_MAGIC;
-#else
- return 0;
-#endif
-}
-
-static inline int au_test_msdos(struct super_block *sb)
-{
- return au_test_fat(sb);
-}
-
-static inline int au_test_vfat(struct super_block *sb)
-{
- return au_test_fat(sb);
-}
-
-static inline int au_test_securityfs(struct super_block *sb __maybe_unused)
-{
-#ifdef CONFIG_SECURITYFS
- return sb->s_magic == SECURITYFS_MAGIC;
-#else
- return 0;
-#endif
-}
-
-static inline int au_test_squashfs(struct super_block *sb __maybe_unused)
-{
-#if defined(CONFIG_SQUASHFS) || defined(CONFIG_SQUASHFS_MODULE)
- return sb->s_magic == SQUASHFS_MAGIC;
-#else
- return 0;
-#endif
-}
-
-static inline int au_test_btrfs(struct super_block *sb __maybe_unused)
-{
-#if defined(CONFIG_BTRFS_FS) || defined(CONFIG_BTRFS_FS_MODULE)
- return sb->s_magic == BTRFS_SUPER_MAGIC;
-#else
- return 0;
-#endif
-}
-
-static inline int au_test_xenfs(struct super_block *sb __maybe_unused)
-{
-#if defined(CONFIG_XENFS) || defined(CONFIG_XENFS_MODULE)
- return sb->s_magic == XENFS_SUPER_MAGIC;
-#else
- return 0;
-#endif
-}
-
-static inline int au_test_debugfs(struct super_block *sb __maybe_unused)
-{
-#ifdef CONFIG_DEBUG_FS
- return sb->s_magic == DEBUGFS_MAGIC;
-#else
- return 0;
-#endif
-}
-
-static inline int au_test_nilfs(struct super_block *sb __maybe_unused)
-{
-#if defined(CONFIG_NILFS) || defined(CONFIG_NILFS_MODULE)
- return sb->s_magic == NILFS_SUPER_MAGIC;
-#else
- return 0;
-#endif
-}
-
-static inline int au_test_hfsplus(struct super_block *sb __maybe_unused)
-{
-#if defined(CONFIG_HFSPLUS_FS) || defined(CONFIG_HFSPLUS_FS_MODULE)
- return sb->s_magic == HFSPLUS_SUPER_MAGIC;
-#else
- return 0;
-#endif
-}
-
-/* ---------------------------------------------------------------------- */
-/*
- * they can't be an aufs branch.
- */
-static inline int au_test_fs_unsuppoted(struct super_block *sb)
-{
- return
-#ifndef CONFIG_AUFS_BR_RAMFS
- au_test_ramfs(sb) ||
-#endif
- au_test_procfs(sb)
- || au_test_sysfs(sb)
- || au_test_configfs(sb)
- || au_test_debugfs(sb)
- || au_test_securityfs(sb)
- || au_test_xenfs(sb)
- || au_test_ecryptfs(sb)
- /* || !strcmp(au_sbtype(sb), "unionfs") */
- || au_test_aufs(sb); /* will be supported in next version */
-}
-
-static inline int au_test_fs_remote(struct super_block *sb)
-{
- return !au_test_tmpfs(sb)
-#ifdef CONFIG_AUFS_BR_RAMFS
- && !au_test_ramfs(sb)
-#endif
- && !(sb->s_type->fs_flags & FS_REQUIRES_DEV);
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * Note: these functions (below) are created after reading ->getattr() in all
- * filesystems under linux/fs. it means we have to do so in every update...
- */
-
-/*
- * some filesystems require getattr to refresh the inode attributes before
- * referencing.
- * in most cases, we can rely on the inode attribute in NFS (or every remote fs)
- * and leave the work for d_revalidate()
- */
-static inline int au_test_fs_refresh_iattr(struct super_block *sb)
-{
- return au_test_nfs(sb)
- || au_test_fuse(sb)
- /* || au_test_btrfs(sb) */ /* untested */
- ;
-}
-
-/*
- * filesystems which don't maintain i_size or i_blocks.
- */
-static inline int au_test_fs_bad_iattr_size(struct super_block *sb)
-{
- return au_test_xfs(sb)
- || au_test_btrfs(sb)
- || au_test_ubifs(sb)
- || au_test_hfsplus(sb) /* maintained, but incorrect */
- /* || au_test_minix(sb) */ /* untested */
- ;
-}
-
-/*
- * filesystems which don't store the correct value in some of their inode
- * attributes.
- */
-static inline int au_test_fs_bad_iattr(struct super_block *sb)
-{
- return au_test_fs_bad_iattr_size(sb)
- || au_test_fat(sb)
- || au_test_msdos(sb)
- || au_test_vfat(sb);
-}
-
-/* they don't check i_nlink in link(2) */
-static inline int au_test_fs_no_limit_nlink(struct super_block *sb)
-{
- return au_test_tmpfs(sb)
-#ifdef CONFIG_AUFS_BR_RAMFS
- || au_test_ramfs(sb)
-#endif
- || au_test_ubifs(sb)
- || au_test_hfsplus(sb);
-}
-
-/*
- * filesystems which sets S_NOATIME and S_NOCMTIME.
- */
-static inline int au_test_fs_notime(struct super_block *sb)
-{
- return au_test_nfs(sb)
- || au_test_fuse(sb)
- || au_test_ubifs(sb)
- ;
-}
-
-/* temporary support for i#1 in cramfs */
-static inline int au_test_fs_unique_ino(struct inode *inode)
-{
- if (au_test_cramfs(inode->i_sb))
- return inode->i_ino != 1;
- return 1;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * the filesystem where the xino files placed must support i/o after unlink and
- * maintain i_size and i_blocks.
- */
-static inline int au_test_fs_bad_xino(struct super_block *sb)
-{
- return au_test_fs_remote(sb)
- || au_test_fs_bad_iattr_size(sb)
- /* don't want unnecessary work for xino */
- || au_test_aufs(sb)
- || au_test_ecryptfs(sb)
- || au_test_nilfs(sb);
-}
-
-static inline int au_test_fs_trunc_xino(struct super_block *sb)
-{
- return au_test_tmpfs(sb)
- || au_test_ramfs(sb);
-}
-
-/*
- * test if the @sb is real-readonly.
- */
-static inline int au_test_fs_rr(struct super_block *sb)
-{
- return au_test_squashfs(sb)
- || au_test_iso9660(sb)
- || au_test_cramfs(sb)
- || au_test_romfs(sb);
-}
-
-/*
- * test if the @inode is nfs with 'noacl' option
- * NFS always sets MS_POSIXACL regardless its mount option 'noacl.'
- */
-static inline int au_test_nfs_noacl(struct inode *inode)
-{
- return au_test_nfs(inode->i_sb)
- /* && IS_POSIXACL(inode) */
- && !nfs_server_capable(inode, NFS_CAP_ACLS);
-}
-
-#endif /* __KERNEL__ */
-#endif /* __AUFS_FSTYPE_H__ */
diff --git a/fs/aufs/hfsnotify.c b/fs/aufs/hfsnotify.c
deleted file mode 100644
index c0a1a63a9..000000000
--- a/fs/aufs/hfsnotify.c
+++ /dev/null
@@ -1,275 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * fsnotify for the lower directories
- */
-
-#include "aufs.h"
-
-/* FS_IN_IGNORED is unnecessary */
-static const __u32 AuHfsnMask = (FS_MOVED_TO | FS_MOVED_FROM | FS_DELETE
- | FS_CREATE | FS_EVENT_ON_CHILD);
-static DECLARE_WAIT_QUEUE_HEAD(au_hfsn_wq);
-static __cacheline_aligned_in_smp atomic64_t au_hfsn_ifree = ATOMIC64_INIT(0);
-
-static void au_hfsn_free_mark(struct fsnotify_mark *mark)
-{
- struct au_hnotify *hn = container_of(mark, struct au_hnotify,
- hn_mark);
- AuDbg("here\n");
- au_cache_free_hnotify(hn);
- smp_mb__before_atomic();
- if (atomic64_dec_and_test(&au_hfsn_ifree))
- wake_up(&au_hfsn_wq);
-}
-
-static int au_hfsn_alloc(struct au_hinode *hinode)
-{
- int err;
- struct au_hnotify *hn;
- struct super_block *sb;
- struct au_branch *br;
- struct fsnotify_mark *mark;
- aufs_bindex_t bindex;
-
- hn = hinode->hi_notify;
- sb = hn->hn_aufs_inode->i_sb;
- bindex = au_br_index(sb, hinode->hi_id);
- br = au_sbr(sb, bindex);
- AuDebugOn(!br->br_hfsn);
-
- mark = &hn->hn_mark;
- fsnotify_init_mark(mark, au_hfsn_free_mark);
- mark->mask = AuHfsnMask;
- /*
- * by udba rename or rmdir, aufs assign a new inode to the known
- * h_inode, so specify 1 to allow dups.
- */
- lockdep_off();
- err = fsnotify_add_mark(mark, br->br_hfsn->hfsn_group, hinode->hi_inode,
- /*mnt*/NULL, /*allow_dups*/1);
- /* even if err */
- fsnotify_put_mark(mark);
- lockdep_on();
-
- return err;
-}
-
-static int au_hfsn_free(struct au_hinode *hinode, struct au_hnotify *hn)
-{
- struct fsnotify_mark *mark;
- unsigned long long ull;
- struct fsnotify_group *group;
-
- ull = atomic64_inc_return(&au_hfsn_ifree);
- BUG_ON(!ull);
-
- mark = &hn->hn_mark;
- spin_lock(&mark->lock);
- group = mark->group;
- fsnotify_get_group(group);
- spin_unlock(&mark->lock);
- lockdep_off();
- fsnotify_destroy_mark(mark, group);
- fsnotify_put_group(group);
- lockdep_on();
-
- /* free hn by myself */
- return 0;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static void au_hfsn_ctl(struct au_hinode *hinode, int do_set)
-{
- struct fsnotify_mark *mark;
-
- mark = &hinode->hi_notify->hn_mark;
- spin_lock(&mark->lock);
- if (do_set) {
- AuDebugOn(mark->mask & AuHfsnMask);
- mark->mask |= AuHfsnMask;
- } else {
- AuDebugOn(!(mark->mask & AuHfsnMask));
- mark->mask &= ~AuHfsnMask;
- }
- spin_unlock(&mark->lock);
- /* fsnotify_recalc_inode_mask(hinode->hi_inode); */
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* #define AuDbgHnotify */
-#ifdef AuDbgHnotify
-static char *au_hfsn_name(u32 mask)
-{
-#ifdef CONFIG_AUFS_DEBUG
-#define test_ret(flag) \
- do { \
- if (mask & flag) \
- return #flag; \
- } while (0)
- test_ret(FS_ACCESS);
- test_ret(FS_MODIFY);
- test_ret(FS_ATTRIB);
- test_ret(FS_CLOSE_WRITE);
- test_ret(FS_CLOSE_NOWRITE);
- test_ret(FS_OPEN);
- test_ret(FS_MOVED_FROM);
- test_ret(FS_MOVED_TO);
- test_ret(FS_CREATE);
- test_ret(FS_DELETE);
- test_ret(FS_DELETE_SELF);
- test_ret(FS_MOVE_SELF);
- test_ret(FS_UNMOUNT);
- test_ret(FS_Q_OVERFLOW);
- test_ret(FS_IN_IGNORED);
- test_ret(FS_ISDIR);
- test_ret(FS_IN_ONESHOT);
- test_ret(FS_EVENT_ON_CHILD);
- return "";
-#undef test_ret
-#else
- return "??";
-#endif
-}
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-static void au_hfsn_free_group(struct fsnotify_group *group)
-{
- struct au_br_hfsnotify *hfsn = group->private;
-
- AuDbg("here\n");
- kfree(hfsn);
-}
-
-static int au_hfsn_handle_event(struct fsnotify_group *group,
- struct inode *inode,
- struct fsnotify_mark *inode_mark,
- struct fsnotify_mark *vfsmount_mark,
- u32 mask, void *data, int data_type,
- const unsigned char *file_name, u32 cookie)
-{
- int err;
- struct au_hnotify *hnotify;
- struct inode *h_dir, *h_inode;
- struct qstr h_child_qstr = QSTR_INIT(file_name, strlen(file_name));
-
- AuDebugOn(data_type != FSNOTIFY_EVENT_INODE);
-
- err = 0;
- /* if FS_UNMOUNT happens, there must be another bug */
- AuDebugOn(mask & FS_UNMOUNT);
- if (mask & (FS_IN_IGNORED | FS_UNMOUNT))
- goto out;
-
- h_dir = inode;
- h_inode = NULL;
-#ifdef AuDbgHnotify
- au_debug_on();
- if (1 || h_child_qstr.len != sizeof(AUFS_XINO_FNAME) - 1
- || strncmp(h_child_qstr.name, AUFS_XINO_FNAME, h_child_qstr.len)) {
- AuDbg("i%lu, mask 0x%x %s, hcname %.*s, hi%lu\n",
- h_dir->i_ino, mask, au_hfsn_name(mask),
- AuLNPair(&h_child_qstr), h_inode ? h_inode->i_ino : 0);
- /* WARN_ON(1); */
- }
- au_debug_off();
-#endif
-
- AuDebugOn(!inode_mark);
- hnotify = container_of(inode_mark, struct au_hnotify, hn_mark);
- err = au_hnotify(h_dir, hnotify, mask, &h_child_qstr, h_inode);
-
-out:
- return err;
-}
-
-static struct fsnotify_ops au_hfsn_ops = {
- .handle_event = au_hfsn_handle_event,
- .free_group_priv = au_hfsn_free_group
-};
-
-/* ---------------------------------------------------------------------- */
-
-static void au_hfsn_fin_br(struct au_branch *br)
-{
- struct au_br_hfsnotify *hfsn;
-
- hfsn = br->br_hfsn;
- if (hfsn) {
- lockdep_off();
- fsnotify_put_group(hfsn->hfsn_group);
- lockdep_on();
- }
-}
-
-static int au_hfsn_init_br(struct au_branch *br, int perm)
-{
- int err;
- struct fsnotify_group *group;
- struct au_br_hfsnotify *hfsn;
-
- err = 0;
- br->br_hfsn = NULL;
- if (!au_br_hnotifyable(perm))
- goto out;
-
- err = -ENOMEM;
- hfsn = kmalloc(sizeof(*hfsn), GFP_NOFS);
- if (unlikely(!hfsn))
- goto out;
-
- err = 0;
- group = fsnotify_alloc_group(&au_hfsn_ops);
- if (IS_ERR(group)) {
- err = PTR_ERR(group);
- pr_err("fsnotify_alloc_group() failed, %d\n", err);
- goto out_hfsn;
- }
-
- group->private = hfsn;
- hfsn->hfsn_group = group;
- br->br_hfsn = hfsn;
- goto out; /* success */
-
-out_hfsn:
- kfree(hfsn);
-out:
- return err;
-}
-
-static int au_hfsn_reset_br(unsigned int udba, struct au_branch *br, int perm)
-{
- int err;
-
- err = 0;
- if (!br->br_hfsn)
- err = au_hfsn_init_br(br, perm);
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static void au_hfsn_fin(void)
-{
- AuDbg("au_hfsn_ifree %lld\n", (long long)atomic64_read(&au_hfsn_ifree));
- wait_event(au_hfsn_wq, !atomic64_read(&au_hfsn_ifree));
-}
-
-const struct au_hnotify_op au_hnotify_op = {
- .ctl = au_hfsn_ctl,
- .alloc = au_hfsn_alloc,
- .free = au_hfsn_free,
-
- .fin = au_hfsn_fin,
-
- .reset_br = au_hfsn_reset_br,
- .fin_br = au_hfsn_fin_br,
- .init_br = au_hfsn_init_br
-};
diff --git a/fs/aufs/hfsplus.c b/fs/aufs/hfsplus.c
deleted file mode 100644
index 145c6ac2f..000000000
--- a/fs/aufs/hfsplus.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (C) 2010-2016 Junjiro R. Okajima
- */
-
-/*
- * special support for filesystems which aqucires an inode mutex
- * at final closing a file, eg, hfsplus.
- *
- * This trick is very simple and stupid, just to open the file before really
- * neceeary open to tell hfsplus that this is not the final closing.
- * The caller should call au_h_open_pre() after acquiring the inode mutex,
- * and au_h_open_post() after releasing it.
- */
-
-#include "aufs.h"
-
-struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
- int force_wr)
-{
- struct file *h_file;
- struct dentry *h_dentry;
-
- h_dentry = au_h_dptr(dentry, bindex);
- AuDebugOn(!h_dentry);
- AuDebugOn(d_is_negative(h_dentry));
-
- h_file = NULL;
- if (au_test_hfsplus(h_dentry->d_sb)
- && d_is_reg(h_dentry))
- h_file = au_h_open(dentry, bindex,
- O_RDONLY | O_NOATIME | O_LARGEFILE,
- /*file*/NULL, force_wr);
- return h_file;
-}
-
-void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
- struct file *h_file)
-{
- if (h_file) {
- fput(h_file);
- au_sbr_put(dentry->d_sb, bindex);
- }
-}
diff --git a/fs/aufs/hnotify.c b/fs/aufs/hnotify.c
deleted file mode 100644
index 3e0a4f67d..000000000
--- a/fs/aufs/hnotify.c
+++ /dev/null
@@ -1,697 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * abstraction to notify the direct changes on lower directories
- */
-
-#include "aufs.h"
-
-int au_hn_alloc(struct au_hinode *hinode, struct inode *inode)
-{
- int err;
- struct au_hnotify *hn;
-
- err = -ENOMEM;
- hn = au_cache_alloc_hnotify();
- if (hn) {
- hn->hn_aufs_inode = inode;
- hinode->hi_notify = hn;
- err = au_hnotify_op.alloc(hinode);
- AuTraceErr(err);
- if (unlikely(err)) {
- hinode->hi_notify = NULL;
- au_cache_free_hnotify(hn);
- /*
- * The upper dir was removed by udba, but the same named
- * dir left. In this case, aufs assignes a new inode
- * number and set the monitor again.
- * For the lower dir, the old monitnor is still left.
- */
- if (err == -EEXIST)
- err = 0;
- }
- }
-
- AuTraceErr(err);
- return err;
-}
-
-void au_hn_free(struct au_hinode *hinode)
-{
- struct au_hnotify *hn;
-
- hn = hinode->hi_notify;
- if (hn) {
- hinode->hi_notify = NULL;
- if (au_hnotify_op.free(hinode, hn))
- au_cache_free_hnotify(hn);
- }
-}
-
-/* ---------------------------------------------------------------------- */
-
-void au_hn_ctl(struct au_hinode *hinode, int do_set)
-{
- if (hinode->hi_notify)
- au_hnotify_op.ctl(hinode, do_set);
-}
-
-void au_hn_reset(struct inode *inode, unsigned int flags)
-{
- aufs_bindex_t bindex, bend;
- struct inode *hi;
- struct dentry *iwhdentry;
-
- bend = au_ibend(inode);
- for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
- hi = au_h_iptr(inode, bindex);
- if (!hi)
- continue;
-
- /* mutex_lock_nested(&hi->i_mutex, AuLsc_I_CHILD); */
- iwhdentry = au_hi_wh(inode, bindex);
- if (iwhdentry)
- dget(iwhdentry);
- au_igrab(hi);
- au_set_h_iptr(inode, bindex, NULL, 0);
- au_set_h_iptr(inode, bindex, au_igrab(hi),
- flags & ~AuHi_XINO);
- iput(hi);
- dput(iwhdentry);
- /* mutex_unlock(&hi->i_mutex); */
- }
-}
-
-/* ---------------------------------------------------------------------- */
-
-static int hn_xino(struct inode *inode, struct inode *h_inode)
-{
- int err;
- aufs_bindex_t bindex, bend, bfound, bstart;
- struct inode *h_i;
-
- err = 0;
- if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
- pr_warn("branch root dir was changed\n");
- goto out;
- }
-
- bfound = -1;
- bend = au_ibend(inode);
- bstart = au_ibstart(inode);
-#if 0 /* reserved for future use */
- if (bindex == bend) {
- /* keep this ino in rename case */
- goto out;
- }
-#endif
- for (bindex = bstart; bindex <= bend; bindex++)
- if (au_h_iptr(inode, bindex) == h_inode) {
- bfound = bindex;
- break;
- }
- if (bfound < 0)
- goto out;
-
- for (bindex = bstart; bindex <= bend; bindex++) {
- h_i = au_h_iptr(inode, bindex);
- if (!h_i)
- continue;
-
- err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0);
- /* ignore this error */
- /* bad action? */
- }
-
- /* children inode number will be broken */
-
-out:
- AuTraceErr(err);
- return err;
-}
-
-static int hn_gen_tree(struct dentry *dentry)
-{
- int err, i, j, ndentry;
- struct au_dcsub_pages dpages;
- struct au_dpage *dpage;
- struct dentry **dentries;
-
- err = au_dpages_init(&dpages, GFP_NOFS);
- if (unlikely(err))
- goto out;
- err = au_dcsub_pages(&dpages, dentry, NULL, NULL);
- if (unlikely(err))
- goto out_dpages;
-
- for (i = 0; i < dpages.ndpage; i++) {
- dpage = dpages.dpages + i;
- dentries = dpage->dentries;
- ndentry = dpage->ndentry;
- for (j = 0; j < ndentry; j++) {
- struct dentry *d;
-
- d = dentries[j];
- if (IS_ROOT(d))
- continue;
-
- au_digen_dec(d);
- if (d_really_is_positive(d))
- /* todo: reset children xino?
- cached children only? */
- au_iigen_dec(d_inode(d));
- }
- }
-
-out_dpages:
- au_dpages_free(&dpages);
-
-#if 0
- /* discard children */
- dentry_unhash(dentry);
- dput(dentry);
-#endif
-out:
- return err;
-}
-
-/*
- * return 0 if processed.
- */
-static int hn_gen_by_inode(char *name, unsigned int nlen, struct inode *inode,
- const unsigned int isdir)
-{
- int err;
- struct dentry *d;
- struct qstr *dname;
-
- err = 1;
- if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
- pr_warn("branch root dir was changed\n");
- err = 0;
- goto out;
- }
-
- if (!isdir) {
- AuDebugOn(!name);
- au_iigen_dec(inode);
- spin_lock(&inode->i_lock);
- hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
- spin_lock(&d->d_lock);
- dname = &d->d_name;
- if (dname->len != nlen
- && memcmp(dname->name, name, nlen)) {
- spin_unlock(&d->d_lock);
- continue;
- }
- err = 0;
- au_digen_dec(d);
- spin_unlock(&d->d_lock);
- break;
- }
- spin_unlock(&inode->i_lock);
- } else {
- au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIR);
- d = d_find_any_alias(inode);
- if (!d) {
- au_iigen_dec(inode);
- goto out;
- }
-
- spin_lock(&d->d_lock);
- dname = &d->d_name;
- if (dname->len == nlen && !memcmp(dname->name, name, nlen)) {
- spin_unlock(&d->d_lock);
- err = hn_gen_tree(d);
- spin_lock(&d->d_lock);
- }
- spin_unlock(&d->d_lock);
- dput(d);
- }
-
-out:
- AuTraceErr(err);
- return err;
-}
-
-static int hn_gen_by_name(struct dentry *dentry, const unsigned int isdir)
-{
- int err;
-
- if (IS_ROOT(dentry)) {
- pr_warn("branch root dir was changed\n");
- return 0;
- }
-
- err = 0;
- if (!isdir) {
- au_digen_dec(dentry);
- if (d_really_is_positive(dentry))
- au_iigen_dec(d_inode(dentry));
- } else {
- au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR);
- if (d_really_is_positive(dentry))
- err = hn_gen_tree(dentry);
- }
-
- AuTraceErr(err);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* hnotify job flags */
-#define AuHnJob_XINO0 1
-#define AuHnJob_GEN (1 << 1)
-#define AuHnJob_DIRENT (1 << 2)
-#define AuHnJob_ISDIR (1 << 3)
-#define AuHnJob_TRYXINO0 (1 << 4)
-#define AuHnJob_MNTPNT (1 << 5)
-#define au_ftest_hnjob(flags, name) ((flags) & AuHnJob_##name)
-#define au_fset_hnjob(flags, name) \
- do { (flags) |= AuHnJob_##name; } while (0)
-#define au_fclr_hnjob(flags, name) \
- do { (flags) &= ~AuHnJob_##name; } while (0)
-
-enum {
- AuHn_CHILD,
- AuHn_PARENT,
- AuHnLast
-};
-
-struct au_hnotify_args {
- struct inode *h_dir, *dir, *h_child_inode;
- u32 mask;
- unsigned int flags[AuHnLast];
- unsigned int h_child_nlen;
- char h_child_name[];
-};
-
-struct hn_job_args {
- unsigned int flags;
- struct inode *inode, *h_inode, *dir, *h_dir;
- struct dentry *dentry;
- char *h_name;
- int h_nlen;
-};
-
-static int hn_job(struct hn_job_args *a)
-{
- const unsigned int isdir = au_ftest_hnjob(a->flags, ISDIR);
- int e;
-
- /* reset xino */
- if (au_ftest_hnjob(a->flags, XINO0) && a->inode)
- hn_xino(a->inode, a->h_inode); /* ignore this error */
-
- if (au_ftest_hnjob(a->flags, TRYXINO0)
- && a->inode
- && a->h_inode) {
- mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
- if (!a->h_inode->i_nlink
- && !(a->h_inode->i_state & I_LINKABLE))
- hn_xino(a->inode, a->h_inode); /* ignore this error */
- mutex_unlock(&a->h_inode->i_mutex);
- }
-
- /* make the generation obsolete */
- if (au_ftest_hnjob(a->flags, GEN)) {
- e = -1;
- if (a->inode)
- e = hn_gen_by_inode(a->h_name, a->h_nlen, a->inode,
- isdir);
- if (e && a->dentry)
- hn_gen_by_name(a->dentry, isdir);
- /* ignore this error */
- }
-
- /* make dir entries obsolete */
- if (au_ftest_hnjob(a->flags, DIRENT) && a->inode) {
- struct au_vdir *vdir;
-
- vdir = au_ivdir(a->inode);
- if (vdir)
- vdir->vd_jiffy = 0;
- /* IMustLock(a->inode); */
- /* a->inode->i_version++; */
- }
-
- /* can do nothing but warn */
- if (au_ftest_hnjob(a->flags, MNTPNT)
- && a->dentry
- && d_mountpoint(a->dentry))
- pr_warn("mount-point %pd is removed or renamed\n", a->dentry);
-
- return 0;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen,
- struct inode *dir)
-{
- struct dentry *dentry, *d, *parent;
- struct qstr *dname;
-
- parent = d_find_any_alias(dir);
- if (!parent)
- return NULL;
-
- dentry = NULL;
- spin_lock(&parent->d_lock);
- list_for_each_entry(d, &parent->d_subdirs, d_child) {
- /* AuDbg("%pd\n", d); */
- spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
- dname = &d->d_name;
- if (dname->len != nlen || memcmp(dname->name, name, nlen))
- goto cont_unlock;
- if (au_di(d))
- au_digen_dec(d);
- else
- goto cont_unlock;
- if (au_dcount(d) > 0) {
- dentry = dget_dlock(d);
- spin_unlock(&d->d_lock);
- break;
- }
-
-cont_unlock:
- spin_unlock(&d->d_lock);
- }
- spin_unlock(&parent->d_lock);
- dput(parent);
-
- if (dentry)
- di_write_lock_child(dentry);
-
- return dentry;
-}
-
-static struct inode *lookup_wlock_by_ino(struct super_block *sb,
- aufs_bindex_t bindex, ino_t h_ino)
-{
- struct inode *inode;
- ino_t ino;
- int err;
-
- inode = NULL;
- err = au_xino_read(sb, bindex, h_ino, &ino);
- if (!err && ino)
- inode = ilookup(sb, ino);
- if (!inode)
- goto out;
-
- if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
- pr_warn("wrong root branch\n");
- iput(inode);
- inode = NULL;
- goto out;
- }
-
- ii_write_lock_child(inode);
-
-out:
- return inode;
-}
-
-static void au_hn_bh(void *_args)
-{
- struct au_hnotify_args *a = _args;
- struct super_block *sb;
- aufs_bindex_t bindex, bend, bfound;
- unsigned char xino, try_iput;
- int err;
- struct inode *inode;
- ino_t h_ino;
- struct hn_job_args args;
- struct dentry *dentry;
- struct au_sbinfo *sbinfo;
-
- AuDebugOn(!_args);
- AuDebugOn(!a->h_dir);
- AuDebugOn(!a->dir);
- AuDebugOn(!a->mask);
- AuDbg("mask 0x%x, i%lu, hi%lu, hci%lu\n",
- a->mask, a->dir->i_ino, a->h_dir->i_ino,
- a->h_child_inode ? a->h_child_inode->i_ino : 0);
-
- inode = NULL;
- dentry = NULL;
- /*
- * do not lock a->dir->i_mutex here
- * because of d_revalidate() may cause a deadlock.
- */
- sb = a->dir->i_sb;
- AuDebugOn(!sb);
- sbinfo = au_sbi(sb);
- AuDebugOn(!sbinfo);
- si_write_lock(sb, AuLock_NOPLMW);
-
- ii_read_lock_parent(a->dir);
- bfound = -1;
- bend = au_ibend(a->dir);
- for (bindex = au_ibstart(a->dir); bindex <= bend; bindex++)
- if (au_h_iptr(a->dir, bindex) == a->h_dir) {
- bfound = bindex;
- break;
- }
- ii_read_unlock(a->dir);
- if (unlikely(bfound < 0))
- goto out;
-
- xino = !!au_opt_test(au_mntflags(sb), XINO);
- h_ino = 0;
- if (a->h_child_inode)
- h_ino = a->h_child_inode->i_ino;
-
- if (a->h_child_nlen
- && (au_ftest_hnjob(a->flags[AuHn_CHILD], GEN)
- || au_ftest_hnjob(a->flags[AuHn_CHILD], MNTPNT)))
- dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen,
- a->dir);
- try_iput = 0;
- if (dentry && d_really_is_positive(dentry))
- inode = d_inode(dentry);
- if (xino && !inode && h_ino
- && (au_ftest_hnjob(a->flags[AuHn_CHILD], XINO0)
- || au_ftest_hnjob(a->flags[AuHn_CHILD], TRYXINO0)
- || au_ftest_hnjob(a->flags[AuHn_CHILD], GEN))) {
- inode = lookup_wlock_by_ino(sb, bfound, h_ino);
- try_iput = 1;
- }
-
- args.flags = a->flags[AuHn_CHILD];
- args.dentry = dentry;
- args.inode = inode;
- args.h_inode = a->h_child_inode;
- args.dir = a->dir;
- args.h_dir = a->h_dir;
- args.h_name = a->h_child_name;
- args.h_nlen = a->h_child_nlen;
- err = hn_job(&args);
- if (dentry) {
- if (au_di(dentry))
- di_write_unlock(dentry);
- dput(dentry);
- }
- if (inode && try_iput) {
- ii_write_unlock(inode);
- iput(inode);
- }
-
- ii_write_lock_parent(a->dir);
- args.flags = a->flags[AuHn_PARENT];
- args.dentry = NULL;
- args.inode = a->dir;
- args.h_inode = a->h_dir;
- args.dir = NULL;
- args.h_dir = NULL;
- args.h_name = NULL;
- args.h_nlen = 0;
- err = hn_job(&args);
- ii_write_unlock(a->dir);
-
-out:
- iput(a->h_child_inode);
- iput(a->h_dir);
- iput(a->dir);
- si_write_unlock(sb);
- au_nwt_done(&sbinfo->si_nowait);
- kfree(a);
-}
-
-/* ---------------------------------------------------------------------- */
-
-int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
- struct qstr *h_child_qstr, struct inode *h_child_inode)
-{
- int err, len;
- unsigned int flags[AuHnLast], f;
- unsigned char isdir, isroot, wh;
- struct inode *dir;
- struct au_hnotify_args *args;
- char *p, *h_child_name;
-
- err = 0;
- AuDebugOn(!hnotify || !hnotify->hn_aufs_inode);
- dir = igrab(hnotify->hn_aufs_inode);
- if (!dir)
- goto out;
-
- isroot = (dir->i_ino == AUFS_ROOT_INO);
- wh = 0;
- h_child_name = (void *)h_child_qstr->name;
- len = h_child_qstr->len;
- if (h_child_name) {
- if (len > AUFS_WH_PFX_LEN
- && !memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
- h_child_name += AUFS_WH_PFX_LEN;
- len -= AUFS_WH_PFX_LEN;
- wh = 1;
- }
- }
-
- isdir = 0;
- if (h_child_inode)
- isdir = !!S_ISDIR(h_child_inode->i_mode);
- flags[AuHn_PARENT] = AuHnJob_ISDIR;
- flags[AuHn_CHILD] = 0;
- if (isdir)
- flags[AuHn_CHILD] = AuHnJob_ISDIR;
- au_fset_hnjob(flags[AuHn_PARENT], DIRENT);
- au_fset_hnjob(flags[AuHn_CHILD], GEN);
- switch (mask & FS_EVENTS_POSS_ON_CHILD) {
- case FS_MOVED_FROM:
- case FS_MOVED_TO:
- au_fset_hnjob(flags[AuHn_CHILD], XINO0);
- au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
- /*FALLTHROUGH*/
- case FS_CREATE:
- AuDebugOn(!h_child_name);
- break;
-
- case FS_DELETE:
- /*
- * aufs never be able to get this child inode.
- * revalidation should be in d_revalidate()
- * by checking i_nlink, i_generation or d_unhashed().
- */
- AuDebugOn(!h_child_name);
- au_fset_hnjob(flags[AuHn_CHILD], TRYXINO0);
- au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
- break;
-
- default:
- AuDebugOn(1);
- }
-
- if (wh)
- h_child_inode = NULL;
-
- err = -ENOMEM;
- /* iput() and kfree() will be called in au_hnotify() */
- args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS);
- if (unlikely(!args)) {
- AuErr1("no memory\n");
- iput(dir);
- goto out;
- }
- args->flags[AuHn_PARENT] = flags[AuHn_PARENT];
- args->flags[AuHn_CHILD] = flags[AuHn_CHILD];
- args->mask = mask;
- args->dir = dir;
- args->h_dir = igrab(h_dir);
- if (h_child_inode)
- h_child_inode = igrab(h_child_inode); /* can be NULL */
- args->h_child_inode = h_child_inode;
- args->h_child_nlen = len;
- if (len) {
- p = (void *)args;
- p += sizeof(*args);
- memcpy(p, h_child_name, len);
- p[len] = 0;
- }
-
- /* NFS fires the event for silly-renamed one from kworker */
- f = 0;
- if (!dir->i_nlink
- || (au_test_nfs(h_dir->i_sb) && (mask & FS_DELETE)))
- f = AuWkq_NEST;
- err = au_wkq_nowait(au_hn_bh, args, dir->i_sb, f);
- if (unlikely(err)) {
- pr_err("wkq %d\n", err);
- iput(args->h_child_inode);
- iput(args->h_dir);
- iput(args->dir);
- kfree(args);
- }
-
-out:
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm)
-{
- int err;
-
- AuDebugOn(!(udba & AuOptMask_UDBA));
-
- err = 0;
- if (au_hnotify_op.reset_br)
- err = au_hnotify_op.reset_br(udba, br, perm);
-
- return err;
-}
-
-int au_hnotify_init_br(struct au_branch *br, int perm)
-{
- int err;
-
- err = 0;
- if (au_hnotify_op.init_br)
- err = au_hnotify_op.init_br(br, perm);
-
- return err;
-}
-
-void au_hnotify_fin_br(struct au_branch *br)
-{
- if (au_hnotify_op.fin_br)
- au_hnotify_op.fin_br(br);
-}
-
-static void au_hn_destroy_cache(void)
-{
- kmem_cache_destroy(au_cachep[AuCache_HNOTIFY]);
- au_cachep[AuCache_HNOTIFY] = NULL;
-}
-
-int __init au_hnotify_init(void)
-{
- int err;
-
- err = -ENOMEM;
- au_cachep[AuCache_HNOTIFY] = AuCache(au_hnotify);
- if (au_cachep[AuCache_HNOTIFY]) {
- err = 0;
- if (au_hnotify_op.init)
- err = au_hnotify_op.init();
- if (unlikely(err))
- au_hn_destroy_cache();
- }
- AuTraceErr(err);
- return err;
-}
-
-void au_hnotify_fin(void)
-{
- if (au_hnotify_op.fin)
- au_hnotify_op.fin();
- /* cf. au_cache_fin() */
- if (au_cachep[AuCache_HNOTIFY])
- au_hn_destroy_cache();
-}
diff --git a/fs/aufs/i_op.c b/fs/aufs/i_op.c
deleted file mode 100644
index 6e50526d8..000000000
--- a/fs/aufs/i_op.c
+++ /dev/null
@@ -1,1477 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * inode operations (except add/del/rename)
- */
-
-#include <linux/device_cgroup.h>
-#include <linux/fs_stack.h>
-#include <linux/namei.h>
-#include <linux/security.h>
-#include "aufs.h"
-
-static int h_permission(struct inode *h_inode, int mask,
- struct vfsmount *h_mnt, int brperm)
-{
- int err;
- const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
-
- err = -EACCES;
- if ((write_mask && IS_IMMUTABLE(h_inode))
- || ((mask & MAY_EXEC)
- && S_ISREG(h_inode->i_mode)
- && ((h_mnt->mnt_flags & MNT_NOEXEC)
- || !(h_inode->i_mode & S_IXUGO))))
- goto out;
-
- /*
- * - skip the lower fs test in the case of write to ro branch.
- * - nfs dir permission write check is optimized, but a policy for
- * link/rename requires a real check.
- * - nfs always sets MS_POSIXACL regardless its mount option 'noacl.'
- * in this case, generic_permission() returns -EOPNOTSUPP.
- */
- if ((write_mask && !au_br_writable(brperm))
- || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
- && write_mask && !(mask & MAY_READ))
- || !h_inode->i_op->permission) {
- /* AuLabel(generic_permission); */
- /* AuDbg("get_acl %pf\n", h_inode->i_op->get_acl); */
- err = generic_permission(h_inode, mask);
- if (err == -EOPNOTSUPP && au_test_nfs_noacl(h_inode))
- err = h_inode->i_op->permission(h_inode, mask);
- AuTraceErr(err);
- } else {
- /* AuLabel(h_inode->permission); */
- err = h_inode->i_op->permission(h_inode, mask);
- AuTraceErr(err);
- }
-
- if (!err)
- err = devcgroup_inode_permission(h_inode, mask);
- if (!err)
- err = security_inode_permission(h_inode, mask);
-
-#if 0
- if (!err) {
- /* todo: do we need to call ima_path_check()? */
- struct path h_path = {
- .dentry =
- .mnt = h_mnt
- };
- err = ima_path_check(&h_path,
- mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
- IMA_COUNT_LEAVE);
- }
-#endif
-
-out:
- return err;
-}
-
-static int aufs_permission(struct inode *inode, int mask)
-{
- int err;
- aufs_bindex_t bindex, bend;
- const unsigned char isdir = !!S_ISDIR(inode->i_mode),
- write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
- struct inode *h_inode;
- struct super_block *sb;
- struct au_branch *br;
-
- /* todo: support rcu-walk? */
- if (mask & MAY_NOT_BLOCK)
- return -ECHILD;
-
- sb = inode->i_sb;
- si_read_lock(sb, AuLock_FLUSH);
- ii_read_lock_child(inode);
-#if 0
- err = au_iigen_test(inode, au_sigen(sb));
- if (unlikely(err))
- goto out;
-#endif
-
- if (!isdir
- || write_mask
- || au_opt_test(au_mntflags(sb), DIRPERM1)) {
- err = au_busy_or_stale();
- h_inode = au_h_iptr(inode, au_ibstart(inode));
- if (unlikely(!h_inode
- || (h_inode->i_mode & S_IFMT)
- != (inode->i_mode & S_IFMT)))
- goto out;
-
- err = 0;
- bindex = au_ibstart(inode);
- br = au_sbr(sb, bindex);
- err = h_permission(h_inode, mask, au_br_mnt(br), br->br_perm);
- if (write_mask
- && !err
- && !special_file(h_inode->i_mode)) {
- /* test whether the upper writable branch exists */
- err = -EROFS;
- for (; bindex >= 0; bindex--)
- if (!au_br_rdonly(au_sbr(sb, bindex))) {
- err = 0;
- break;
- }
- }
- goto out;
- }
-
- /* non-write to dir */
- err = 0;
- bend = au_ibend(inode);
- for (bindex = au_ibstart(inode); !err && bindex <= bend; bindex++) {
- h_inode = au_h_iptr(inode, bindex);
- if (h_inode) {
- err = au_busy_or_stale();
- if (unlikely(!S_ISDIR(h_inode->i_mode)))
- break;
-
- br = au_sbr(sb, bindex);
- err = h_permission(h_inode, mask, au_br_mnt(br),
- br->br_perm);
- }
- }
-
-out:
- ii_read_unlock(inode);
- si_read_unlock(sb);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
- unsigned int flags)
-{
- struct dentry *ret, *parent;
- struct inode *inode;
- struct super_block *sb;
- int err, npositive;
-
- IMustLock(dir);
-
- /* todo: support rcu-walk? */
- ret = ERR_PTR(-ECHILD);
- if (flags & LOOKUP_RCU)
- goto out;
-
- ret = ERR_PTR(-ENAMETOOLONG);
- if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
- goto out;
-
- sb = dir->i_sb;
- err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
- ret = ERR_PTR(err);
- if (unlikely(err))
- goto out;
-
- err = au_di_init(dentry);
- ret = ERR_PTR(err);
- if (unlikely(err))
- goto out_si;
-
- inode = NULL;
- npositive = 0; /* suppress a warning */
- parent = dentry->d_parent; /* dir inode is locked */
- di_read_lock_parent(parent, AuLock_IR);
- err = au_alive_dir(parent);
- if (!err)
- err = au_digen_test(parent, au_sigen(sb));
- if (!err) {
- npositive = au_lkup_dentry(dentry, au_dbstart(parent),
- /*type*/0);
- err = npositive;
- }
- di_read_unlock(parent, AuLock_IR);
- ret = ERR_PTR(err);
- if (unlikely(err < 0))
- goto out_unlock;
-
- if (npositive) {
- inode = au_new_inode(dentry, /*must_new*/0);
- if (IS_ERR(inode)) {
- ret = (void *)inode;
- inode = NULL;
- goto out_unlock;
- }
- }
-
- if (inode)
- atomic_inc(&inode->i_count);
- ret = d_splice_alias(inode, dentry);
-#if 0
- if (unlikely(d_need_lookup(dentry))) {
- spin_lock(&dentry->d_lock);
- dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
- spin_unlock(&dentry->d_lock);
- } else
-#endif
- if (inode) {
- if (!IS_ERR(ret)) {
- iput(inode);
- if (ret && ret != dentry)
- ii_write_unlock(inode);
- } else {
- ii_write_unlock(inode);
- iput(inode);
- inode = NULL;
- }
- }
-
-out_unlock:
- di_write_unlock(dentry);
- if (inode) {
- /* verbose coding for lock class name */
- if (unlikely(S_ISLNK(inode->i_mode)))
- au_rw_class(&au_di(dentry)->di_rwsem,
- au_lc_key + AuLcSymlink_DIINFO);
- else if (unlikely(S_ISDIR(inode->i_mode)))
- au_rw_class(&au_di(dentry)->di_rwsem,
- au_lc_key + AuLcDir_DIINFO);
- else /* likely */
- au_rw_class(&au_di(dentry)->di_rwsem,
- au_lc_key + AuLcNonDir_DIINFO);
- }
-out_si:
- si_read_unlock(sb);
-out:
- return ret;
-}
-
-/* ---------------------------------------------------------------------- */
-
-struct aopen_node {
- struct hlist_node hlist;
- struct file *file, *h_file;
-};
-
-static int au_do_aopen(struct inode *inode, struct file *file)
-{
- struct au_sphlhead *aopen;
- struct aopen_node *node;
- struct au_do_open_args args = {
- .no_lock = 1,
- .open = au_do_open_nondir
- };
-
- aopen = &au_sbi(inode->i_sb)->si_aopen;
- spin_lock(&aopen->spin);
- hlist_for_each_entry(node, &aopen->head, hlist)
- if (node->file == file) {
- args.h_file = node->h_file;
- break;
- }
- spin_unlock(&aopen->spin);
- /* AuDebugOn(!args.h_file); */
-
- return au_do_open(file, &args);
-}
-
-static int aufs_atomic_open(struct inode *dir, struct dentry *dentry,
- struct file *file, unsigned int open_flag,
- umode_t create_mode, int *opened)
-{
- int err, h_opened = *opened;
- struct dentry *parent;
- struct dentry *d;
- struct au_sphlhead *aopen;
- struct vfsub_aopen_args args = {
- .open_flag = open_flag,
- .create_mode = create_mode,
- .opened = &h_opened
- };
- struct aopen_node aopen_node = {
- .file = file
- };
-
- IMustLock(dir);
- AuDbg("open_flag 0x%x\n", open_flag);
- AuDbgDentry(dentry);
-
- err = 0;
- if (!au_di(dentry)) {
- d = aufs_lookup(dir, dentry, /*flags*/0);
- if (IS_ERR(d)) {
- err = PTR_ERR(d);
- goto out;
- } else if (d) {
- /*
- * obsoleted dentry found.
- * another error will be returned later.
- */
- d_drop(d);
- dput(d);
- AuDbgDentry(d);
- }
- AuDbgDentry(dentry);
- }
-
- if (d_is_positive(dentry)
- || d_unhashed(dentry)
- || d_unlinked(dentry)
- || !(open_flag & O_CREAT))
- goto out_no_open;
-
- err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
- if (unlikely(err))
- goto out;
-
- parent = dentry->d_parent; /* dir is locked */
- di_write_lock_parent(parent);
- err = au_lkup_dentry(dentry, /*bstart*/0, /*type*/0);
- if (unlikely(err))
- goto out_unlock;
-
- AuDbgDentry(dentry);
- if (d_is_positive(dentry))
- goto out_unlock;
-
- args.file = get_empty_filp();
- err = PTR_ERR(args.file);
- if (IS_ERR(args.file))
- goto out_unlock;
-
- args.file->f_flags = file->f_flags;
- err = au_aopen_or_create(dir, dentry, &args);
- AuTraceErr(err);
- AuDbgFile(args.file);
- if (unlikely(err < 0)) {
- if (h_opened & FILE_OPENED)
- fput(args.file);
- else
- put_filp(args.file);
- goto out_unlock;
- }
-
- /* some filesystems don't set FILE_CREATED while succeeded? */
- *opened |= FILE_CREATED;
- if (h_opened & FILE_OPENED)
- aopen_node.h_file = args.file;
- else {
- put_filp(args.file);
- args.file = NULL;
- }
- aopen = &au_sbi(dir->i_sb)->si_aopen;
- au_sphl_add(&aopen_node.hlist, aopen);
- err = finish_open(file, dentry, au_do_aopen, opened);
- au_sphl_del(&aopen_node.hlist, aopen);
- AuTraceErr(err);
- AuDbgFile(file);
- if (aopen_node.h_file)
- fput(aopen_node.h_file);
-
-out_unlock:
- di_write_unlock(parent);
- aufs_read_unlock(dentry, AuLock_DW);
- AuDbgDentry(dentry);
- if (unlikely(err))
- goto out;
-out_no_open:
- if (!err && !(*opened & FILE_CREATED)) {
- AuLabel(out_no_open);
- dget(dentry);
- err = finish_no_open(file, dentry);
- }
-out:
- AuDbg("%pd%s%s\n", dentry,
- (*opened & FILE_CREATED) ? " created" : "",
- (*opened & FILE_OPENED) ? " opened" : "");
- AuTraceErr(err);
- return err;
-}
-
-
-/* ---------------------------------------------------------------------- */
-
-static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
- const unsigned char add_entry, aufs_bindex_t bcpup,
- aufs_bindex_t bstart)
-{
- int err;
- struct dentry *h_parent;
- struct inode *h_dir;
-
- if (add_entry)
- IMustLock(d_inode(parent));
- else
- di_write_lock_parent(parent);
-
- err = 0;
- if (!au_h_dptr(parent, bcpup)) {
- if (bstart > bcpup)
- err = au_cpup_dirs(dentry, bcpup);
- else if (bstart < bcpup)
- err = au_cpdown_dirs(dentry, bcpup);
- else
- BUG();
- }
- if (!err && add_entry && !au_ftest_wrdir(add_entry, TMPFILE)) {
- h_parent = au_h_dptr(parent, bcpup);
- h_dir = d_inode(h_parent);
- mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
- err = au_lkup_neg(dentry, bcpup, /*wh*/0);
- /* todo: no unlock here */
- mutex_unlock(&h_dir->i_mutex);
-
- AuDbg("bcpup %d\n", bcpup);
- if (!err) {
- if (d_really_is_negative(dentry))
- au_set_h_dptr(dentry, bstart, NULL);
- au_update_dbrange(dentry, /*do_put_zero*/0);
- }
- }
-
- if (!add_entry)
- di_write_unlock(parent);
- if (!err)
- err = bcpup; /* success */
-
- AuTraceErr(err);
- return err;
-}
-
-/*
- * decide the branch and the parent dir where we will create a new entry.
- * returns new bindex or an error.
- * copyup the parent dir if needed.
- */
-int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
- struct au_wr_dir_args *args)
-{
- int err;
- unsigned int flags;
- aufs_bindex_t bcpup, bstart, src_bstart;
- const unsigned char add_entry
- = au_ftest_wrdir(args->flags, ADD_ENTRY)
- | au_ftest_wrdir(args->flags, TMPFILE);
- struct super_block *sb;
- struct dentry *parent;
- struct au_sbinfo *sbinfo;
-
- sb = dentry->d_sb;
- sbinfo = au_sbi(sb);
- parent = dget_parent(dentry);
- bstart = au_dbstart(dentry);
- bcpup = bstart;
- if (args->force_btgt < 0) {
- if (src_dentry) {
- src_bstart = au_dbstart(src_dentry);
- if (src_bstart < bstart)
- bcpup = src_bstart;
- } else if (add_entry) {
- flags = 0;
- if (au_ftest_wrdir(args->flags, ISDIR))
- au_fset_wbr(flags, DIR);
- err = AuWbrCreate(sbinfo, dentry, flags);
- bcpup = err;
- }
-
- if (bcpup < 0 || au_test_ro(sb, bcpup, d_inode(dentry))) {
- if (add_entry)
- err = AuWbrCopyup(sbinfo, dentry);
- else {
- if (!IS_ROOT(dentry)) {
- di_read_lock_parent(parent, !AuLock_IR);
- err = AuWbrCopyup(sbinfo, dentry);
- di_read_unlock(parent, !AuLock_IR);
- } else
- err = AuWbrCopyup(sbinfo, dentry);
- }
- bcpup = err;
- if (unlikely(err < 0))
- goto out;
- }
- } else {
- bcpup = args->force_btgt;
- AuDebugOn(au_test_ro(sb, bcpup, d_inode(dentry)));
- }
-
- AuDbg("bstart %d, bcpup %d\n", bstart, bcpup);
- err = bcpup;
- if (bcpup == bstart)
- goto out; /* success */
-
- /* copyup the new parent into the branch we process */
- err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, bstart);
- if (err >= 0) {
- if (d_really_is_negative(dentry)) {
- au_set_h_dptr(dentry, bstart, NULL);
- au_set_dbstart(dentry, bcpup);
- au_set_dbend(dentry, bcpup);
- }
- AuDebugOn(add_entry
- && !au_ftest_wrdir(args->flags, TMPFILE)
- && !au_h_dptr(dentry, bcpup));
- }
-
-out:
- dput(parent);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-void au_pin_hdir_unlock(struct au_pin *p)
-{
- if (p->hdir)
- au_hn_imtx_unlock(p->hdir);
-}
-
-int au_pin_hdir_lock(struct au_pin *p)
-{
- int err;
-
- err = 0;
- if (!p->hdir)
- goto out;
-
- /* even if an error happens later, keep this lock */
- au_hn_imtx_lock_nested(p->hdir, p->lsc_hi);
-
- err = -EBUSY;
- if (unlikely(p->hdir->hi_inode != d_inode(p->h_parent)))
- goto out;
-
- err = 0;
- if (p->h_dentry)
- err = au_h_verify(p->h_dentry, p->udba, p->hdir->hi_inode,
- p->h_parent, p->br);
-
-out:
- return err;
-}
-
-int au_pin_hdir_relock(struct au_pin *p)
-{
- int err, i;
- struct inode *h_i;
- struct dentry *h_d[] = {
- p->h_dentry,
- p->h_parent
- };
-
- err = au_pin_hdir_lock(p);
- if (unlikely(err))
- goto out;
-
- for (i = 0; !err && i < sizeof(h_d)/sizeof(*h_d); i++) {
- if (!h_d[i])
- continue;
- if (d_is_positive(h_d[i])) {
- h_i = d_inode(h_d[i]);
- err = !h_i->i_nlink;
- }
- }
-
-out:
- return err;
-}
-
-void au_pin_hdir_set_owner(struct au_pin *p, struct task_struct *task)
-{
-#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
- p->hdir->hi_inode->i_mutex.owner = task;
-#endif
-}
-
-void au_pin_hdir_acquire_nest(struct au_pin *p)
-{
- if (p->hdir) {
- mutex_acquire_nest(&p->hdir->hi_inode->i_mutex.dep_map,
- p->lsc_hi, 0, NULL, _RET_IP_);
- au_pin_hdir_set_owner(p, current);
- }
-}
-
-void au_pin_hdir_release(struct au_pin *p)
-{
- if (p->hdir) {
- au_pin_hdir_set_owner(p, p->task);
- mutex_release(&p->hdir->hi_inode->i_mutex.dep_map, 1, _RET_IP_);
- }
-}
-
-struct dentry *au_pinned_h_parent(struct au_pin *pin)
-{
- if (pin && pin->parent)
- return au_h_dptr(pin->parent, pin->bindex);
- return NULL;
-}
-
-void au_unpin(struct au_pin *p)
-{
- if (p->hdir)
- au_pin_hdir_unlock(p);
- if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE))
- vfsub_mnt_drop_write(p->h_mnt);
- if (!p->hdir)
- return;
-
- if (!au_ftest_pin(p->flags, DI_LOCKED))
- di_read_unlock(p->parent, AuLock_IR);
- iput(p->hdir->hi_inode);
- dput(p->parent);
- p->parent = NULL;
- p->hdir = NULL;
- p->h_mnt = NULL;
- /* do not clear p->task */
-}
-
-int au_do_pin(struct au_pin *p)
-{
- int err;
- struct super_block *sb;
- struct inode *h_dir;
-
- err = 0;
- sb = p->dentry->d_sb;
- p->br = au_sbr(sb, p->bindex);
- if (IS_ROOT(p->dentry)) {
- if (au_ftest_pin(p->flags, MNT_WRITE)) {
- p->h_mnt = au_br_mnt(p->br);
- err = vfsub_mnt_want_write(p->h_mnt);
- if (unlikely(err)) {
- au_fclr_pin(p->flags, MNT_WRITE);
- goto out_err;
- }
- }
- goto out;
- }
-
- p->h_dentry = NULL;
- if (p->bindex <= au_dbend(p->dentry))
- p->h_dentry = au_h_dptr(p->dentry, p->bindex);
-
- p->parent = dget_parent(p->dentry);
- if (!au_ftest_pin(p->flags, DI_LOCKED))
- di_read_lock(p->parent, AuLock_IR, p->lsc_di);
-
- h_dir = NULL;
- p->h_parent = au_h_dptr(p->parent, p->bindex);
- p->hdir = au_hi(d_inode(p->parent), p->bindex);
- if (p->hdir)
- h_dir = p->hdir->hi_inode;
-
- /*
- * udba case, or
- * if DI_LOCKED is not set, then p->parent may be different
- * and h_parent can be NULL.
- */
- if (unlikely(!p->hdir || !h_dir || !p->h_parent)) {
- err = -EBUSY;
- if (!au_ftest_pin(p->flags, DI_LOCKED))
- di_read_unlock(p->parent, AuLock_IR);
- dput(p->parent);
- p->parent = NULL;
- goto out_err;
- }
-
- if (au_ftest_pin(p->flags, MNT_WRITE)) {
- p->h_mnt = au_br_mnt(p->br);
- err = vfsub_mnt_want_write(p->h_mnt);
- if (unlikely(err)) {
- au_fclr_pin(p->flags, MNT_WRITE);
- if (!au_ftest_pin(p->flags, DI_LOCKED))
- di_read_unlock(p->parent, AuLock_IR);
- dput(p->parent);
- p->parent = NULL;
- goto out_err;
- }
- }
-
- au_igrab(h_dir);
- err = au_pin_hdir_lock(p);
- if (!err)
- goto out; /* success */
-
- au_unpin(p);
-
-out_err:
- pr_err("err %d\n", err);
- err = au_busy_or_stale();
-out:
- return err;
-}
-
-void au_pin_init(struct au_pin *p, struct dentry *dentry,
- aufs_bindex_t bindex, int lsc_di, int lsc_hi,
- unsigned int udba, unsigned char flags)
-{
- p->dentry = dentry;
- p->udba = udba;
- p->lsc_di = lsc_di;
- p->lsc_hi = lsc_hi;
- p->flags = flags;
- p->bindex = bindex;
-
- p->parent = NULL;
- p->hdir = NULL;
- p->h_mnt = NULL;
-
- p->h_dentry = NULL;
- p->h_parent = NULL;
- p->br = NULL;
- p->task = current;
-}
-
-int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
- unsigned int udba, unsigned char flags)
-{
- au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
- udba, flags);
- return au_do_pin(pin);
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * ->setattr() and ->getattr() are called in various cases.
- * chmod, stat: dentry is revalidated.
- * fchmod, fstat: file and dentry are not revalidated, additionally they may be
- * unhashed.
- * for ->setattr(), ia->ia_file is passed from ftruncate only.
- */
-/* todo: consolidate with do_refresh() and simple_reval_dpath() */
-int au_reval_for_attr(struct dentry *dentry, unsigned int sigen)
-{
- int err;
- struct dentry *parent;
-
- err = 0;
- if (au_digen_test(dentry, sigen)) {
- parent = dget_parent(dentry);
- di_read_lock_parent(parent, AuLock_IR);
- err = au_refresh_dentry(dentry, parent);
- di_read_unlock(parent, AuLock_IR);
- dput(parent);
- }
-
- AuTraceErr(err);
- return err;
-}
-
-int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
- struct au_icpup_args *a)
-{
- int err;
- loff_t sz;
- aufs_bindex_t bstart, ibstart;
- struct dentry *hi_wh, *parent;
- struct inode *inode;
- struct au_wr_dir_args wr_dir_args = {
- .force_btgt = -1,
- .flags = 0
- };
-
- if (d_is_dir(dentry))
- au_fset_wrdir(wr_dir_args.flags, ISDIR);
- /* plink or hi_wh() case */
- bstart = au_dbstart(dentry);
- inode = d_inode(dentry);
- ibstart = au_ibstart(inode);
- if (bstart != ibstart && !au_test_ro(inode->i_sb, ibstart, inode))
- wr_dir_args.force_btgt = ibstart;
- err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
- if (unlikely(err < 0))
- goto out;
- a->btgt = err;
- if (err != bstart)
- au_fset_icpup(a->flags, DID_CPUP);
-
- err = 0;
- a->pin_flags = AuPin_MNT_WRITE;
- parent = NULL;
- if (!IS_ROOT(dentry)) {
- au_fset_pin(a->pin_flags, DI_LOCKED);
- parent = dget_parent(dentry);
- di_write_lock_parent(parent);
- }
-
- err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags);
- if (unlikely(err))
- goto out_parent;
-
- a->h_path.dentry = au_h_dptr(dentry, bstart);
- sz = -1;
- a->h_inode = d_inode(a->h_path.dentry);
- if (ia && (ia->ia_valid & ATTR_SIZE)) {
- mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
- if (ia->ia_size < i_size_read(a->h_inode))
- sz = ia->ia_size;
- mutex_unlock(&a->h_inode->i_mutex);
- }
-
- hi_wh = NULL;
- if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) {
- hi_wh = au_hi_wh(inode, a->btgt);
- if (!hi_wh) {
- struct au_cp_generic cpg = {
- .dentry = dentry,
- .bdst = a->btgt,
- .bsrc = -1,
- .len = sz,
- .pin = &a->pin
- };
- err = au_sio_cpup_wh(&cpg, /*file*/NULL);
- if (unlikely(err))
- goto out_unlock;
- hi_wh = au_hi_wh(inode, a->btgt);
- /* todo: revalidate hi_wh? */
- }
- }
-
- if (parent) {
- au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
- di_downgrade_lock(parent, AuLock_IR);
- dput(parent);
- parent = NULL;
- }
- if (!au_ftest_icpup(a->flags, DID_CPUP))
- goto out; /* success */
-
- if (!d_unhashed(dentry)) {
- struct au_cp_generic cpg = {
- .dentry = dentry,
- .bdst = a->btgt,
- .bsrc = bstart,
- .len = sz,
- .pin = &a->pin,
- .flags = AuCpup_DTIME | AuCpup_HOPEN
- };
- err = au_sio_cpup_simple(&cpg);
- if (!err)
- a->h_path.dentry = au_h_dptr(dentry, a->btgt);
- } else if (!hi_wh)
- a->h_path.dentry = au_h_dptr(dentry, a->btgt);
- else
- a->h_path.dentry = hi_wh; /* do not dget here */
-
-out_unlock:
- a->h_inode = d_inode(a->h_path.dentry);
- if (!err)
- goto out; /* success */
- au_unpin(&a->pin);
-out_parent:
- if (parent) {
- di_write_unlock(parent);
- dput(parent);
- }
-out:
- if (!err)
- mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
- return err;
-}
-
-static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
-{
- int err;
- struct inode *inode, *delegated;
- struct super_block *sb;
- struct file *file;
- struct au_icpup_args *a;
-
- inode = d_inode(dentry);
- IMustLock(inode);
-
- err = -ENOMEM;
- a = kzalloc(sizeof(*a), GFP_NOFS);
- if (unlikely(!a))
- goto out;
-
- if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
- ia->ia_valid &= ~ATTR_MODE;
-
- file = NULL;
- sb = dentry->d_sb;
- err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
- if (unlikely(err))
- goto out_kfree;
-
- if (ia->ia_valid & ATTR_FILE) {
- /* currently ftruncate(2) only */
- AuDebugOn(!d_is_reg(dentry));
- file = ia->ia_file;
- err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
- if (unlikely(err))
- goto out_si;
- ia->ia_file = au_hf_top(file);
- a->udba = AuOpt_UDBA_NONE;
- } else {
- /* fchmod() doesn't pass ia_file */
- a->udba = au_opt_udba(sb);
- di_write_lock_child(dentry);
- /* no d_unlinked(), to set UDBA_NONE for root */
- if (d_unhashed(dentry))
- a->udba = AuOpt_UDBA_NONE;
- if (a->udba != AuOpt_UDBA_NONE) {
- AuDebugOn(IS_ROOT(dentry));
- err = au_reval_for_attr(dentry, au_sigen(sb));
- if (unlikely(err))
- goto out_dentry;
- }
- }
-
- err = au_pin_and_icpup(dentry, ia, a);
- if (unlikely(err < 0))
- goto out_dentry;
- if (au_ftest_icpup(a->flags, DID_CPUP)) {
- ia->ia_file = NULL;
- ia->ia_valid &= ~ATTR_FILE;
- }
-
- a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
- if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME))
- == (ATTR_MODE | ATTR_CTIME)) {
- err = security_path_chmod(&a->h_path, ia->ia_mode);
- if (unlikely(err))
- goto out_unlock;
- } else if ((ia->ia_valid & (ATTR_UID | ATTR_GID))
- && (ia->ia_valid & ATTR_CTIME)) {
- err = security_path_chown(&a->h_path, ia->ia_uid, ia->ia_gid);
- if (unlikely(err))
- goto out_unlock;
- }
-
- if (ia->ia_valid & ATTR_SIZE) {
- struct file *f;
-
- if (ia->ia_size < i_size_read(inode))
- /* unmap only */
- truncate_setsize(inode, ia->ia_size);
-
- f = NULL;
- if (ia->ia_valid & ATTR_FILE)
- f = ia->ia_file;
- mutex_unlock(&a->h_inode->i_mutex);
- err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
- mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
- } else {
- delegated = NULL;
- while (1) {
- err = vfsub_notify_change(&a->h_path, ia, &delegated);
- if (delegated) {
- err = break_deleg_wait(&delegated);
- if (!err)
- continue;
- }
- break;
- }
- }
- /*
- * regardless aufs 'acl' option setting.
- * why don't all acl-aware fs call this func from their ->setattr()?
- */
- if (!err && (ia->ia_valid & ATTR_MODE))
- err = vfsub_acl_chmod(a->h_inode, ia->ia_mode);
- if (!err)
- au_cpup_attr_changeable(inode);
-
-out_unlock:
- mutex_unlock(&a->h_inode->i_mutex);
- au_unpin(&a->pin);
- if (unlikely(err))
- au_update_dbstart(dentry);
-out_dentry:
- di_write_unlock(dentry);
- if (file) {
- fi_write_unlock(file);
- ia->ia_file = file;
- ia->ia_valid |= ATTR_FILE;
- }
-out_si:
- si_read_unlock(sb);
-out_kfree:
- kfree(a);
-out:
- AuTraceErr(err);
- return err;
-}
-
-#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
-static int au_h_path_to_set_attr(struct dentry *dentry,
- struct au_icpup_args *a, struct path *h_path)
-{
- int err;
- struct super_block *sb;
-
- sb = dentry->d_sb;
- a->udba = au_opt_udba(sb);
- /* no d_unlinked(), to set UDBA_NONE for root */
- if (d_unhashed(dentry))
- a->udba = AuOpt_UDBA_NONE;
- if (a->udba != AuOpt_UDBA_NONE) {
- AuDebugOn(IS_ROOT(dentry));
- err = au_reval_for_attr(dentry, au_sigen(sb));
- if (unlikely(err))
- goto out;
- }
- err = au_pin_and_icpup(dentry, /*ia*/NULL, a);
- if (unlikely(err < 0))
- goto out;
-
- h_path->dentry = a->h_path.dentry;
- h_path->mnt = au_sbr_mnt(sb, a->btgt);
-
-out:
- return err;
-}
-
-ssize_t au_srxattr(struct dentry *dentry, struct au_srxattr *arg)
-{
- int err;
- struct path h_path;
- struct super_block *sb;
- struct au_icpup_args *a;
- struct inode *inode, *h_inode;
-
- inode = d_inode(dentry);
- IMustLock(inode);
-
- err = -ENOMEM;
- a = kzalloc(sizeof(*a), GFP_NOFS);
- if (unlikely(!a))
- goto out;
-
- sb = dentry->d_sb;
- err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
- if (unlikely(err))
- goto out_kfree;
-
- h_path.dentry = NULL; /* silence gcc */
- di_write_lock_child(dentry);
- err = au_h_path_to_set_attr(dentry, a, &h_path);
- if (unlikely(err))
- goto out_di;
-
- mutex_unlock(&a->h_inode->i_mutex);
- switch (arg->type) {
- case AU_XATTR_SET:
- err = vfsub_setxattr(h_path.dentry,
- arg->u.set.name, arg->u.set.value,
- arg->u.set.size, arg->u.set.flags);
- break;
- case AU_XATTR_REMOVE:
- err = vfsub_removexattr(h_path.dentry, arg->u.remove.name);
- break;
- case AU_ACL_SET:
- err = -EOPNOTSUPP;
- h_inode = d_inode(h_path.dentry);
- if (h_inode->i_op->set_acl)
- err = h_inode->i_op->set_acl(h_inode,
- arg->u.acl_set.acl,
- arg->u.acl_set.type);
- break;
- }
- if (!err)
- au_cpup_attr_timesizes(inode);
-
- au_unpin(&a->pin);
- if (unlikely(err))
- au_update_dbstart(dentry);
-
-out_di:
- di_write_unlock(dentry);
- si_read_unlock(sb);
-out_kfree:
- kfree(a);
-out:
- AuTraceErr(err);
- return err;
-}
-#endif
-
-static void au_refresh_iattr(struct inode *inode, struct kstat *st,
- unsigned int nlink)
-{
- unsigned int n;
-
- inode->i_mode = st->mode;
- /* don't i_[ug]id_write() here */
- inode->i_uid = st->uid;
- inode->i_gid = st->gid;
- inode->i_atime = st->atime;
- inode->i_mtime = st->mtime;
- inode->i_ctime = st->ctime;
-
- au_cpup_attr_nlink(inode, /*force*/0);
- if (S_ISDIR(inode->i_mode)) {
- n = inode->i_nlink;
- n -= nlink;
- n += st->nlink;
- smp_mb(); /* for i_nlink */
- /* 0 can happen */
- set_nlink(inode, n);
- }
-
- spin_lock(&inode->i_lock);
- inode->i_blocks = st->blocks;
- i_size_write(inode, st->size);
- spin_unlock(&inode->i_lock);
-}
-
-/*
- * common routine for aufs_getattr() and aufs_getxattr().
- * returns zero or negative (an error).
- * @dentry will be read-locked in success.
- */
-int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path)
-{
- int err;
- unsigned int mnt_flags, sigen;
- unsigned char udba_none;
- aufs_bindex_t bindex;
- struct super_block *sb, *h_sb;
- struct inode *inode;
-
- h_path->mnt = NULL;
- h_path->dentry = NULL;
-
- err = 0;
- sb = dentry->d_sb;
- mnt_flags = au_mntflags(sb);
- udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
-
- /* support fstat(2) */
- if (!d_unlinked(dentry) && !udba_none) {
- sigen = au_sigen(sb);
- err = au_digen_test(dentry, sigen);
- if (!err) {
- di_read_lock_child(dentry, AuLock_IR);
- err = au_dbrange_test(dentry);
- if (unlikely(err)) {
- di_read_unlock(dentry, AuLock_IR);
- goto out;
- }
- } else {
- AuDebugOn(IS_ROOT(dentry));
- di_write_lock_child(dentry);
- err = au_dbrange_test(dentry);
- if (!err)
- err = au_reval_for_attr(dentry, sigen);
- if (!err)
- di_downgrade_lock(dentry, AuLock_IR);
- else {
- di_write_unlock(dentry);
- goto out;
- }
- }
- } else
- di_read_lock_child(dentry, AuLock_IR);
-
- inode = d_inode(dentry);
- bindex = au_ibstart(inode);
- h_path->mnt = au_sbr_mnt(sb, bindex);
- h_sb = h_path->mnt->mnt_sb;
- if (!force
- && !au_test_fs_bad_iattr(h_sb)
- && udba_none)
- goto out; /* success */
-
- if (au_dbstart(dentry) == bindex)
- h_path->dentry = au_h_dptr(dentry, bindex);
- else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
- h_path->dentry = au_plink_lkup(inode, bindex);
- if (IS_ERR(h_path->dentry))
- /* pretending success */
- h_path->dentry = NULL;
- else
- dput(h_path->dentry);
- }
-
-out:
- return err;
-}
-
-static int aufs_getattr(struct vfsmount *mnt __maybe_unused,
- struct dentry *dentry, struct kstat *st)
-{
- int err;
- unsigned char positive;
- struct path h_path;
- struct inode *inode;
- struct super_block *sb;
-
- inode = d_inode(dentry);
- sb = dentry->d_sb;
- err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
- if (unlikely(err))
- goto out;
- err = au_h_path_getattr(dentry, /*force*/0, &h_path);
- if (unlikely(err))
- goto out_si;
- if (unlikely(!h_path.dentry))
- /* illegally overlapped or something */
- goto out_fill; /* pretending success */
-
- positive = d_is_positive(h_path.dentry);
- if (positive)
- err = vfs_getattr(&h_path, st);
- if (!err) {
- if (positive)
- au_refresh_iattr(inode, st,
- d_inode(h_path.dentry)->i_nlink);
- goto out_fill; /* success */
- }
- AuTraceErr(err);
- goto out_di;
-
-out_fill:
- generic_fillattr(inode, st);
-out_di:
- di_read_unlock(dentry, AuLock_IR);
-out_si:
- si_read_unlock(sb);
-out:
- AuTraceErr(err);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * Assumption:
- * - the number of symlinks is not so many.
- *
- * Structure:
- * - sbinfo (instead of iinfo) contains an hlist of struct au_symlink.
- * If iinfo contained the hlist, then it would be rather large waste of memory
- * I am afraid.
- * - struct au_symlink contains the necessary info for h_inode follow_link() and
- * put_link().
- */
-
-struct au_symlink {
- union {
- struct hlist_node hlist;
- struct rcu_head rcu;
- };
-
- struct inode *h_inode;
- void *h_cookie;
-};
-
-static void au_symlink_add(struct super_block *sb, struct au_symlink *slink,
- struct inode *h_inode, void *cookie)
-{
- struct au_sbinfo *sbinfo;
-
- ihold(h_inode);
- slink->h_inode = h_inode;
- slink->h_cookie = cookie;
- sbinfo = au_sbi(sb);
- au_sphl_add(&slink->hlist, &sbinfo->si_symlink);
-}
-
-static void au_symlink_del(struct super_block *sb, struct au_symlink *slink)
-{
- struct au_sbinfo *sbinfo;
-
- /* do not iput() within rcu */
- iput(slink->h_inode);
- slink->h_inode = NULL;
- sbinfo = au_sbi(sb);
- au_sphl_del_rcu(&slink->hlist, &sbinfo->si_symlink);
- kfree_rcu(slink, rcu);
-}
-
-static const char *aufs_follow_link(struct dentry *dentry, void **cookie)
-{
- const char *ret;
- struct inode *inode, *h_inode;
- struct dentry *h_dentry;
- struct au_symlink *slink;
- int err;
- aufs_bindex_t bindex;
-
- ret = NULL; /* suppress a warning */
- err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
- if (unlikely(err))
- goto out;
-
- err = au_d_hashed_positive(dentry);
- if (unlikely(err))
- goto out_unlock;
-
- err = -EINVAL;
- inode = d_inode(dentry);
- bindex = au_ibstart(inode);
- h_inode = au_h_iptr(inode, bindex);
- if (unlikely(!h_inode->i_op->follow_link))
- goto out_unlock;
-
- err = -ENOMEM;
- slink = kmalloc(sizeof(*slink), GFP_NOFS);
- if (unlikely(!slink))
- goto out_unlock;
-
- err = -EBUSY;
- h_dentry = NULL;
- if (au_dbstart(dentry) <= bindex) {
- h_dentry = au_h_dptr(dentry, bindex);
- if (h_dentry)
- dget(h_dentry);
- }
- if (!h_dentry) {
- h_dentry = d_find_any_alias(h_inode);
- if (IS_ERR(h_dentry)) {
- err = PTR_ERR(h_dentry);
- goto out_free;
- }
- }
- if (unlikely(!h_dentry))
- goto out_free;
-
- err = 0;
- AuDbg("%pf\n", h_inode->i_op->follow_link);
- AuDbgDentry(h_dentry);
- ret = h_inode->i_op->follow_link(h_dentry, cookie);
- dput(h_dentry);
-
- if (!IS_ERR_OR_NULL(ret)) {
- au_symlink_add(inode->i_sb, slink, h_inode, *cookie);
- *cookie = slink;
- AuDbg("slink %p\n", slink);
- goto out_unlock; /* success */
- }
-
-out_free:
- slink->h_inode = NULL;
- kfree_rcu(slink, rcu);
-out_unlock:
- aufs_read_unlock(dentry, AuLock_IR);
-out:
- if (unlikely(err))
- ret = ERR_PTR(err);
- AuTraceErrPtr(ret);
- return ret;
-}
-
-static void aufs_put_link(struct inode *inode, void *cookie)
-{
- struct au_symlink *slink;
- struct inode *h_inode;
-
- slink = cookie;
- AuDbg("slink %p\n", slink);
- h_inode = slink->h_inode;
- AuDbg("%pf\n", h_inode->i_op->put_link);
- AuDbgInode(h_inode);
- if (h_inode->i_op->put_link)
- h_inode->i_op->put_link(h_inode, slink->h_cookie);
- au_symlink_del(inode->i_sb, slink);
-}
-
-/* ---------------------------------------------------------------------- */
-
-static int aufs_update_time(struct inode *inode, struct timespec *ts, int flags)
-{
- int err;
- struct super_block *sb;
- struct inode *h_inode;
-
- sb = inode->i_sb;
- /* mmap_sem might be acquired already, cf. aufs_mmap() */
- lockdep_off();
- si_read_lock(sb, AuLock_FLUSH);
- ii_write_lock_child(inode);
- lockdep_on();
- h_inode = au_h_iptr(inode, au_ibstart(inode));
- err = vfsub_update_time(h_inode, ts, flags);
- lockdep_off();
- if (!err)
- au_cpup_attr_timesizes(inode);
- ii_write_unlock(inode);
- si_read_unlock(sb);
- lockdep_on();
-
- if (!err && (flags & S_VERSION))
- inode_inc_iversion(inode);
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* no getattr version will be set by module.c:aufs_init() */
-struct inode_operations aufs_iop_nogetattr[AuIop_Last],
- aufs_iop[] = {
- [AuIop_SYMLINK] = {
- .permission = aufs_permission,
-#ifdef CONFIG_FS_POSIX_ACL
- .get_acl = aufs_get_acl,
- .set_acl = aufs_set_acl, /* unsupport for symlink? */
-#endif
-
- .setattr = aufs_setattr,
- .getattr = aufs_getattr,
-
-#ifdef CONFIG_AUFS_XATTR
- .setxattr = aufs_setxattr,
- .getxattr = aufs_getxattr,
- .listxattr = aufs_listxattr,
- .removexattr = aufs_removexattr,
-#endif
-
- .readlink = generic_readlink,
- .follow_link = aufs_follow_link,
- .put_link = aufs_put_link,
-
- /* .update_time = aufs_update_time */
- },
- [AuIop_DIR] = {
- .create = aufs_create,
- .lookup = aufs_lookup,
- .link = aufs_link,
- .unlink = aufs_unlink,
- .symlink = aufs_symlink,
- .mkdir = aufs_mkdir,
- .rmdir = aufs_rmdir,
- .mknod = aufs_mknod,
- .rename = aufs_rename,
-
- .permission = aufs_permission,
-#ifdef CONFIG_FS_POSIX_ACL
- .get_acl = aufs_get_acl,
- .set_acl = aufs_set_acl,
-#endif
-
- .setattr = aufs_setattr,
- .getattr = aufs_getattr,
-
-#ifdef CONFIG_AUFS_XATTR
- .setxattr = aufs_setxattr,
- .getxattr = aufs_getxattr,
- .listxattr = aufs_listxattr,
- .removexattr = aufs_removexattr,
-#endif
-
- .update_time = aufs_update_time,
- .atomic_open = aufs_atomic_open,
- .tmpfile = aufs_tmpfile
- },
- [AuIop_OTHER] = {
- .permission = aufs_permission,
-#ifdef CONFIG_FS_POSIX_ACL
- .get_acl = aufs_get_acl,
- .set_acl = aufs_set_acl,
-#endif
-
- .setattr = aufs_setattr,
- .getattr = aufs_getattr,
-
-#ifdef CONFIG_AUFS_XATTR
- .setxattr = aufs_setxattr,
- .getxattr = aufs_getxattr,
- .listxattr = aufs_listxattr,
- .removexattr = aufs_removexattr,
-#endif
-
- .update_time = aufs_update_time
- }
-};
diff --git a/fs/aufs/i_op_add.c b/fs/aufs/i_op_add.c
deleted file mode 100644
index 3fc355859..000000000
--- a/fs/aufs/i_op_add.c
+++ /dev/null
@@ -1,919 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * inode operations (add entry)
- */
-
-#include "aufs.h"
-
-/*
- * final procedure of adding a new entry, except link(2).
- * remove whiteout, instantiate, copyup the parent dir's times and size
- * and update version.
- * if it failed, re-create the removed whiteout.
- */
-static int epilog(struct inode *dir, aufs_bindex_t bindex,
- struct dentry *wh_dentry, struct dentry *dentry)
-{
- int err, rerr;
- aufs_bindex_t bwh;
- struct path h_path;
- struct super_block *sb;
- struct inode *inode, *h_dir;
- struct dentry *wh;
-
- bwh = -1;
- sb = dir->i_sb;
- if (wh_dentry) {
- h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */
- IMustLock(h_dir);
- AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
- bwh = au_dbwh(dentry);
- h_path.dentry = wh_dentry;
- h_path.mnt = au_sbr_mnt(sb, bindex);
- err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
- dentry);
- if (unlikely(err))
- goto out;
- }
-
- inode = au_new_inode(dentry, /*must_new*/1);
- if (!IS_ERR(inode)) {
- d_instantiate(dentry, inode);
- dir = d_inode(dentry->d_parent); /* dir inode is locked */
- IMustLock(dir);
- au_dir_ts(dir, bindex);
- dir->i_version++;
- au_fhsm_wrote(sb, bindex, /*force*/0);
- return 0; /* success */
- }
-
- err = PTR_ERR(inode);
- if (!wh_dentry)
- goto out;
-
- /* revert */
- /* dir inode is locked */
- wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
- rerr = PTR_ERR(wh);
- if (IS_ERR(wh)) {
- AuIOErr("%pd reverting whiteout failed(%d, %d)\n",
- dentry, err, rerr);
- err = -EIO;
- } else
- dput(wh);
-
-out:
- return err;
-}
-
-static int au_d_may_add(struct dentry *dentry)
-{
- int err;
-
- err = 0;
- if (unlikely(d_unhashed(dentry)))
- err = -ENOENT;
- if (unlikely(d_really_is_positive(dentry)))
- err = -EEXIST;
- return err;
-}
-
-/*
- * simple tests for the adding inode operations.
- * following the checks in vfs, plus the parent-child relationship.
- */
-int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
- struct dentry *h_parent, int isdir)
-{
- int err;
- umode_t h_mode;
- struct dentry *h_dentry;
- struct inode *h_inode;
-
- err = -ENAMETOOLONG;
- if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
- goto out;
-
- h_dentry = au_h_dptr(dentry, bindex);
- if (d_really_is_negative(dentry)) {
- err = -EEXIST;
- if (unlikely(d_is_positive(h_dentry)))
- goto out;
- } else {
- /* rename(2) case */
- err = -EIO;
- if (unlikely(d_is_negative(h_dentry)))
- goto out;
- h_inode = d_inode(h_dentry);
- if (unlikely(!h_inode->i_nlink))
- goto out;
-
- h_mode = h_inode->i_mode;
- if (!isdir) {
- err = -EISDIR;
- if (unlikely(S_ISDIR(h_mode)))
- goto out;
- } else if (unlikely(!S_ISDIR(h_mode))) {
- err = -ENOTDIR;
- goto out;
- }
- }
-
- err = 0;
- /* expected parent dir is locked */
- if (unlikely(h_parent != h_dentry->d_parent))
- err = -EIO;
-
-out:
- AuTraceErr(err);
- return err;
-}
-
-/*
- * initial procedure of adding a new entry.
- * prepare writable branch and the parent dir, lock it,
- * and lookup whiteout for the new entry.
- */
-static struct dentry*
-lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
- struct dentry *src_dentry, struct au_pin *pin,
- struct au_wr_dir_args *wr_dir_args)
-{
- struct dentry *wh_dentry, *h_parent;
- struct super_block *sb;
- struct au_branch *br;
- int err;
- unsigned int udba;
- aufs_bindex_t bcpup;
-
- AuDbg("%pd\n", dentry);
-
- err = au_wr_dir(dentry, src_dentry, wr_dir_args);
- bcpup = err;
- wh_dentry = ERR_PTR(err);
- if (unlikely(err < 0))
- goto out;
-
- sb = dentry->d_sb;
- udba = au_opt_udba(sb);
- err = au_pin(pin, dentry, bcpup, udba,
- AuPin_DI_LOCKED | AuPin_MNT_WRITE);
- wh_dentry = ERR_PTR(err);
- if (unlikely(err))
- goto out;
-
- h_parent = au_pinned_h_parent(pin);
- if (udba != AuOpt_UDBA_NONE
- && au_dbstart(dentry) == bcpup)
- err = au_may_add(dentry, bcpup, h_parent,
- au_ftest_wrdir(wr_dir_args->flags, ISDIR));
- else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
- err = -ENAMETOOLONG;
- wh_dentry = ERR_PTR(err);
- if (unlikely(err))
- goto out_unpin;
-
- br = au_sbr(sb, bcpup);
- if (dt) {
- struct path tmp = {
- .dentry = h_parent,
- .mnt = au_br_mnt(br)
- };
- au_dtime_store(dt, au_pinned_parent(pin), &tmp);
- }
-
- wh_dentry = NULL;
- if (bcpup != au_dbwh(dentry))
- goto out; /* success */
-
- /*
- * ENAMETOOLONG here means that if we allowed create such name, then it
- * would not be able to removed in the future. So we don't allow such
- * name here and we don't handle ENAMETOOLONG differently here.
- */
- wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
-
-out_unpin:
- if (IS_ERR(wh_dentry))
- au_unpin(pin);
-out:
- return wh_dentry;
-}
-
-/* ---------------------------------------------------------------------- */
-
-enum { Mknod, Symlink, Creat };
-struct simple_arg {
- int type;
- union {
- struct {
- umode_t mode;
- bool want_excl;
- bool try_aopen;
- struct vfsub_aopen_args *aopen;
- } c;
- struct {
- const char *symname;
- } s;
- struct {
- umode_t mode;
- dev_t dev;
- } m;
- } u;
-};
-
-static int add_simple(struct inode *dir, struct dentry *dentry,
- struct simple_arg *arg)
-{
- int err, rerr;
- aufs_bindex_t bstart;
- unsigned char created;
- const unsigned char try_aopen
- = (arg->type == Creat && arg->u.c.try_aopen);
- struct dentry *wh_dentry, *parent;
- struct inode *h_dir;
- struct super_block *sb;
- struct au_branch *br;
- /* to reuduce stack size */
- struct {
- struct au_dtime dt;
- struct au_pin pin;
- struct path h_path;
- struct au_wr_dir_args wr_dir_args;
- } *a;
-
- AuDbg("%pd\n", dentry);
- IMustLock(dir);
-
- err = -ENOMEM;
- a = kmalloc(sizeof(*a), GFP_NOFS);
- if (unlikely(!a))
- goto out;
- a->wr_dir_args.force_btgt = -1;
- a->wr_dir_args.flags = AuWrDir_ADD_ENTRY;
-
- parent = dentry->d_parent; /* dir inode is locked */
- if (!try_aopen) {
- err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
- if (unlikely(err))
- goto out_free;
- }
- err = au_d_may_add(dentry);
- if (unlikely(err))
- goto out_unlock;
- if (!try_aopen)
- di_write_lock_parent(parent);
- wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
- &a->pin, &a->wr_dir_args);
- err = PTR_ERR(wh_dentry);
- if (IS_ERR(wh_dentry))
- goto out_parent;
-
- bstart = au_dbstart(dentry);
- sb = dentry->d_sb;
- br = au_sbr(sb, bstart);
- a->h_path.dentry = au_h_dptr(dentry, bstart);
- a->h_path.mnt = au_br_mnt(br);
- h_dir = au_pinned_h_dir(&a->pin);
- switch (arg->type) {
- case Creat:
- err = 0;
- if (!try_aopen || !h_dir->i_op->atomic_open)
- err = vfsub_create(h_dir, &a->h_path, arg->u.c.mode,
- arg->u.c.want_excl);
- else
- err = vfsub_atomic_open(h_dir, a->h_path.dentry,
- arg->u.c.aopen, br);
- break;
- case Symlink:
- err = vfsub_symlink(h_dir, &a->h_path, arg->u.s.symname);
- break;
- case Mknod:
- err = vfsub_mknod(h_dir, &a->h_path, arg->u.m.mode,
- arg->u.m.dev);
- break;
- default:
- BUG();
- }
- created = !err;
- if (!err)
- err = epilog(dir, bstart, wh_dentry, dentry);
-
- /* revert */
- if (unlikely(created && err && d_is_positive(a->h_path.dentry))) {
- /* no delegation since it is just created */
- rerr = vfsub_unlink(h_dir, &a->h_path, /*delegated*/NULL,
- /*force*/0);
- if (rerr) {
- AuIOErr("%pd revert failure(%d, %d)\n",
- dentry, err, rerr);
- err = -EIO;
- }
- au_dtime_revert(&a->dt);
- }
-
- if (!err && try_aopen && !h_dir->i_op->atomic_open)
- *arg->u.c.aopen->opened |= FILE_CREATED;
-
- au_unpin(&a->pin);
- dput(wh_dentry);
-
-out_parent:
- if (!try_aopen)
- di_write_unlock(parent);
-out_unlock:
- if (unlikely(err)) {
- au_update_dbstart(dentry);
- d_drop(dentry);
- }
- if (!try_aopen)
- aufs_read_unlock(dentry, AuLock_DW);
-out_free:
- kfree(a);
-out:
- return err;
-}
-
-int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
- dev_t dev)
-{
- struct simple_arg arg = {
- .type = Mknod,
- .u.m = {
- .mode = mode,
- .dev = dev
- }
- };
- return add_simple(dir, dentry, &arg);
-}
-
-int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
-{
- struct simple_arg arg = {
- .type = Symlink,
- .u.s.symname = symname
- };
- return add_simple(dir, dentry, &arg);
-}
-
-int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool want_excl)
-{
- struct simple_arg arg = {
- .type = Creat,
- .u.c = {
- .mode = mode,
- .want_excl = want_excl
- }
- };
- return add_simple(dir, dentry, &arg);
-}
-
-int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
- struct vfsub_aopen_args *aopen_args)
-{
- struct simple_arg arg = {
- .type = Creat,
- .u.c = {
- .mode = aopen_args->create_mode,
- .want_excl = aopen_args->open_flag & O_EXCL,
- .try_aopen = true,
- .aopen = aopen_args
- }
- };
- return add_simple(dir, dentry, &arg);
-}
-
-int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
-{
- int err;
- aufs_bindex_t bindex;
- struct super_block *sb;
- struct dentry *parent, *h_parent, *h_dentry;
- struct inode *h_dir, *inode;
- struct vfsmount *h_mnt;
- struct au_wr_dir_args wr_dir_args = {
- .force_btgt = -1,
- .flags = AuWrDir_TMPFILE
- };
-
- /* copy-up may happen */
- mutex_lock(&dir->i_mutex);
-
- sb = dir->i_sb;
- err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
- if (unlikely(err))
- goto out;
-
- err = au_di_init(dentry);
- if (unlikely(err))
- goto out_si;
-
- err = -EBUSY;
- parent = d_find_any_alias(dir);
- AuDebugOn(!parent);
- di_write_lock_parent(parent);
- if (unlikely(d_inode(parent) != dir))
- goto out_parent;
-
- err = au_digen_test(parent, au_sigen(sb));
- if (unlikely(err))
- goto out_parent;
-
- bindex = au_dbstart(parent);
- au_set_dbstart(dentry, bindex);
- au_set_dbend(dentry, bindex);
- err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
- bindex = err;
- if (unlikely(err < 0))
- goto out_parent;
-
- err = -EOPNOTSUPP;
- h_dir = au_h_iptr(dir, bindex);
- if (unlikely(!h_dir->i_op->tmpfile))
- goto out_parent;
-
- h_mnt = au_sbr_mnt(sb, bindex);
- err = vfsub_mnt_want_write(h_mnt);
- if (unlikely(err))
- goto out_parent;
-
- h_parent = au_h_dptr(parent, bindex);
- err = inode_permission(d_inode(h_parent), MAY_WRITE | MAY_EXEC);
- if (unlikely(err))
- goto out_mnt;
-
- err = -ENOMEM;
- h_dentry = d_alloc(h_parent, &dentry->d_name);
- if (unlikely(!h_dentry))
- goto out_mnt;
-
- err = h_dir->i_op->tmpfile(h_dir, h_dentry, mode);
- if (unlikely(err))
- goto out_dentry;
-
- au_set_dbstart(dentry, bindex);
- au_set_dbend(dentry, bindex);
- au_set_h_dptr(dentry, bindex, dget(h_dentry));
- inode = au_new_inode(dentry, /*must_new*/1);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- au_set_h_dptr(dentry, bindex, NULL);
- au_set_dbstart(dentry, -1);
- au_set_dbend(dentry, -1);
- } else {
- if (!inode->i_nlink)
- set_nlink(inode, 1);
- d_tmpfile(dentry, inode);
- au_di(dentry)->di_tmpfile = 1;
-
- /* update without i_mutex */
- if (au_ibstart(dir) == au_dbstart(dentry))
- au_cpup_attr_timesizes(dir);
- }
-
-out_dentry:
- dput(h_dentry);
-out_mnt:
- vfsub_mnt_drop_write(h_mnt);
-out_parent:
- di_write_unlock(parent);
- dput(parent);
- di_write_unlock(dentry);
- if (!err)
-#if 0
- /* verbose coding for lock class name */
- au_rw_class(&au_di(dentry)->di_rwsem,
- au_lc_key + AuLcNonDir_DIINFO);
-#else
- ;
-#endif
- else {
- au_di_fin(dentry);
- dentry->d_fsdata = NULL;
- }
-out_si:
- si_read_unlock(sb);
-out:
- mutex_unlock(&dir->i_mutex);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-struct au_link_args {
- aufs_bindex_t bdst, bsrc;
- struct au_pin pin;
- struct path h_path;
- struct dentry *src_parent, *parent;
-};
-
-static int au_cpup_before_link(struct dentry *src_dentry,
- struct au_link_args *a)
-{
- int err;
- struct dentry *h_src_dentry;
- struct au_cp_generic cpg = {
- .dentry = src_dentry,
- .bdst = a->bdst,
- .bsrc = a->bsrc,
- .len = -1,
- .pin = &a->pin,
- .flags = AuCpup_DTIME | AuCpup_HOPEN /* | AuCpup_KEEPLINO */
- };
-
- di_read_lock_parent(a->src_parent, AuLock_IR);
- err = au_test_and_cpup_dirs(src_dentry, a->bdst);
- if (unlikely(err))
- goto out;
-
- h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
- err = au_pin(&a->pin, src_dentry, a->bdst,
- au_opt_udba(src_dentry->d_sb),
- AuPin_DI_LOCKED | AuPin_MNT_WRITE);
- if (unlikely(err))
- goto out;
-
- err = au_sio_cpup_simple(&cpg);
- au_unpin(&a->pin);
-
-out:
- di_read_unlock(a->src_parent, AuLock_IR);
- return err;
-}
-
-static int au_cpup_or_link(struct dentry *src_dentry, struct dentry *dentry,
- struct au_link_args *a)
-{
- int err;
- unsigned char plink;
- aufs_bindex_t bend;
- struct dentry *h_src_dentry;
- struct inode *h_inode, *inode, *delegated;
- struct super_block *sb;
- struct file *h_file;
-
- plink = 0;
- h_inode = NULL;
- sb = src_dentry->d_sb;
- inode = d_inode(src_dentry);
- if (au_ibstart(inode) <= a->bdst)
- h_inode = au_h_iptr(inode, a->bdst);
- if (!h_inode || !h_inode->i_nlink) {
- /* copyup src_dentry as the name of dentry. */
- bend = au_dbend(dentry);
- if (bend < a->bsrc)
- au_set_dbend(dentry, a->bsrc);
- au_set_h_dptr(dentry, a->bsrc,
- dget(au_h_dptr(src_dentry, a->bsrc)));
- dget(a->h_path.dentry);
- au_set_h_dptr(dentry, a->bdst, NULL);
- AuDbg("temporary d_inode...\n");
- spin_lock(&dentry->d_lock);
- dentry->d_inode = d_inode(src_dentry); /* tmp */
- spin_unlock(&dentry->d_lock);
- h_file = au_h_open_pre(dentry, a->bsrc, /*force_wr*/0);
- if (IS_ERR(h_file))
- err = PTR_ERR(h_file);
- else {
- struct au_cp_generic cpg = {
- .dentry = dentry,
- .bdst = a->bdst,
- .bsrc = -1,
- .len = -1,
- .pin = &a->pin,
- .flags = AuCpup_KEEPLINO
- };
- err = au_sio_cpup_simple(&cpg);
- au_h_open_post(dentry, a->bsrc, h_file);
- if (!err) {
- dput(a->h_path.dentry);
- a->h_path.dentry = au_h_dptr(dentry, a->bdst);
- } else
- au_set_h_dptr(dentry, a->bdst,
- a->h_path.dentry);
- }
- spin_lock(&dentry->d_lock);
- dentry->d_inode = NULL; /* restore */
- spin_unlock(&dentry->d_lock);
- AuDbg("temporary d_inode...done\n");
- au_set_h_dptr(dentry, a->bsrc, NULL);
- au_set_dbend(dentry, bend);
- } else {
- /* the inode of src_dentry already exists on a.bdst branch */
- h_src_dentry = d_find_alias(h_inode);
- if (!h_src_dentry && au_plink_test(inode)) {
- plink = 1;
- h_src_dentry = au_plink_lkup(inode, a->bdst);
- err = PTR_ERR(h_src_dentry);
- if (IS_ERR(h_src_dentry))
- goto out;
-
- if (unlikely(d_is_negative(h_src_dentry))) {
- dput(h_src_dentry);
- h_src_dentry = NULL;
- }
-
- }
- if (h_src_dentry) {
- delegated = NULL;
- err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
- &a->h_path, &delegated);
- if (unlikely(err == -EWOULDBLOCK)) {
- pr_warn("cannot retry for NFSv4 delegation"
- " for an internal link\n");
- iput(delegated);
- }
- dput(h_src_dentry);
- } else {
- AuIOErr("no dentry found for hi%lu on b%d\n",
- h_inode->i_ino, a->bdst);
- err = -EIO;
- }
- }
-
- if (!err && !plink)
- au_plink_append(inode, a->bdst, a->h_path.dentry);
-
-out:
- AuTraceErr(err);
- return err;
-}
-
-int aufs_link(struct dentry *src_dentry, struct inode *dir,
- struct dentry *dentry)
-{
- int err, rerr;
- struct au_dtime dt;
- struct au_link_args *a;
- struct dentry *wh_dentry, *h_src_dentry;
- struct inode *inode, *delegated;
- struct super_block *sb;
- struct au_wr_dir_args wr_dir_args = {
- /* .force_btgt = -1, */
- .flags = AuWrDir_ADD_ENTRY
- };
-
- IMustLock(dir);
- inode = d_inode(src_dentry);
- IMustLock(inode);
-
- err = -ENOMEM;
- a = kzalloc(sizeof(*a), GFP_NOFS);
- if (unlikely(!a))
- goto out;
-
- a->parent = dentry->d_parent; /* dir inode is locked */
- err = aufs_read_and_write_lock2(dentry, src_dentry,
- AuLock_NOPLM | AuLock_GEN);
- if (unlikely(err))
- goto out_kfree;
- err = au_d_linkable(src_dentry);
- if (unlikely(err))
- goto out_unlock;
- err = au_d_may_add(dentry);
- if (unlikely(err))
- goto out_unlock;
-
- a->src_parent = dget_parent(src_dentry);
- wr_dir_args.force_btgt = au_ibstart(inode);
-
- di_write_lock_parent(a->parent);
- wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
- wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
- &wr_dir_args);
- err = PTR_ERR(wh_dentry);
- if (IS_ERR(wh_dentry))
- goto out_parent;
-
- err = 0;
- sb = dentry->d_sb;
- a->bdst = au_dbstart(dentry);
- a->h_path.dentry = au_h_dptr(dentry, a->bdst);
- a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
- a->bsrc = au_ibstart(inode);
- h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
- if (!h_src_dentry && au_di(src_dentry)->di_tmpfile)
- h_src_dentry = dget(au_hi_wh(inode, a->bsrc));
- if (!h_src_dentry) {
- a->bsrc = au_dbstart(src_dentry);
- h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
- AuDebugOn(!h_src_dentry);
- } else if (IS_ERR(h_src_dentry)) {
- err = PTR_ERR(h_src_dentry);
- goto out_parent;
- }
-
- if (au_opt_test(au_mntflags(sb), PLINK)) {
- if (a->bdst < a->bsrc
- /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
- err = au_cpup_or_link(src_dentry, dentry, a);
- else {
- delegated = NULL;
- err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
- &a->h_path, &delegated);
- if (unlikely(err == -EWOULDBLOCK)) {
- pr_warn("cannot retry for NFSv4 delegation"
- " for an internal link\n");
- iput(delegated);
- }
- }
- dput(h_src_dentry);
- } else {
- /*
- * copyup src_dentry to the branch we process,
- * and then link(2) to it.
- */
- dput(h_src_dentry);
- if (a->bdst < a->bsrc
- /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
- au_unpin(&a->pin);
- di_write_unlock(a->parent);
- err = au_cpup_before_link(src_dentry, a);
- di_write_lock_parent(a->parent);
- if (!err)
- err = au_pin(&a->pin, dentry, a->bdst,
- au_opt_udba(sb),
- AuPin_DI_LOCKED | AuPin_MNT_WRITE);
- if (unlikely(err))
- goto out_wh;
- }
- if (!err) {
- h_src_dentry = au_h_dptr(src_dentry, a->bdst);
- err = -ENOENT;
- if (h_src_dentry && d_is_positive(h_src_dentry)) {
- delegated = NULL;
- err = vfsub_link(h_src_dentry,
- au_pinned_h_dir(&a->pin),
- &a->h_path, &delegated);
- if (unlikely(err == -EWOULDBLOCK)) {
- pr_warn("cannot retry"
- " for NFSv4 delegation"
- " for an internal link\n");
- iput(delegated);
- }
- }
- }
- }
- if (unlikely(err))
- goto out_unpin;
-
- if (wh_dentry) {
- a->h_path.dentry = wh_dentry;
- err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
- dentry);
- if (unlikely(err))
- goto out_revert;
- }
-
- au_dir_ts(dir, a->bdst);
- dir->i_version++;
- inc_nlink(inode);
- inode->i_ctime = dir->i_ctime;
- d_instantiate(dentry, au_igrab(inode));
- if (d_unhashed(a->h_path.dentry))
- /* some filesystem calls d_drop() */
- d_drop(dentry);
- /* some filesystems consume an inode even hardlink */
- au_fhsm_wrote(sb, a->bdst, /*force*/0);
- goto out_unpin; /* success */
-
-out_revert:
- /* no delegation since it is just created */
- rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path,
- /*delegated*/NULL, /*force*/0);
- if (unlikely(rerr)) {
- AuIOErr("%pd reverting failed(%d, %d)\n", dentry, err, rerr);
- err = -EIO;
- }
- au_dtime_revert(&dt);
-out_unpin:
- au_unpin(&a->pin);
-out_wh:
- dput(wh_dentry);
-out_parent:
- di_write_unlock(a->parent);
- dput(a->src_parent);
-out_unlock:
- if (unlikely(err)) {
- au_update_dbstart(dentry);
- d_drop(dentry);
- }
- aufs_read_and_write_unlock2(dentry, src_dentry);
-out_kfree:
- kfree(a);
-out:
- AuTraceErr(err);
- return err;
-}
-
-int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
-{
- int err, rerr;
- aufs_bindex_t bindex;
- unsigned char diropq;
- struct path h_path;
- struct dentry *wh_dentry, *parent, *opq_dentry;
- struct mutex *h_mtx;
- struct super_block *sb;
- struct {
- struct au_pin pin;
- struct au_dtime dt;
- } *a; /* reduce the stack usage */
- struct au_wr_dir_args wr_dir_args = {
- .force_btgt = -1,
- .flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
- };
-
- IMustLock(dir);
-
- err = -ENOMEM;
- a = kmalloc(sizeof(*a), GFP_NOFS);
- if (unlikely(!a))
- goto out;
-
- err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
- if (unlikely(err))
- goto out_free;
- err = au_d_may_add(dentry);
- if (unlikely(err))
- goto out_unlock;
-
- parent = dentry->d_parent; /* dir inode is locked */
- di_write_lock_parent(parent);
- wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
- &a->pin, &wr_dir_args);
- err = PTR_ERR(wh_dentry);
- if (IS_ERR(wh_dentry))
- goto out_parent;
-
- sb = dentry->d_sb;
- bindex = au_dbstart(dentry);
- h_path.dentry = au_h_dptr(dentry, bindex);
- h_path.mnt = au_sbr_mnt(sb, bindex);
- err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
- if (unlikely(err))
- goto out_unpin;
-
- /* make the dir opaque */
- diropq = 0;
- h_mtx = &d_inode(h_path.dentry)->i_mutex;
- if (wh_dentry
- || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
- mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
- opq_dentry = au_diropq_create(dentry, bindex);
- mutex_unlock(h_mtx);
- err = PTR_ERR(opq_dentry);
- if (IS_ERR(opq_dentry))
- goto out_dir;
- dput(opq_dentry);
- diropq = 1;
- }
-
- err = epilog(dir, bindex, wh_dentry, dentry);
- if (!err) {
- inc_nlink(dir);
- goto out_unpin; /* success */
- }
-
- /* revert */
- if (diropq) {
- AuLabel(revert opq);
- mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
- rerr = au_diropq_remove(dentry, bindex);
- mutex_unlock(h_mtx);
- if (rerr) {
- AuIOErr("%pd reverting diropq failed(%d, %d)\n",
- dentry, err, rerr);
- err = -EIO;
- }
- }
-
-out_dir:
- AuLabel(revert dir);
- rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path);
- if (rerr) {
- AuIOErr("%pd reverting dir failed(%d, %d)\n",
- dentry, err, rerr);
- err = -EIO;
- }
- au_dtime_revert(&a->dt);
-out_unpin:
- au_unpin(&a->pin);
- dput(wh_dentry);
-out_parent:
- di_write_unlock(parent);
-out_unlock:
- if (unlikely(err)) {
- au_update_dbstart(dentry);
- d_drop(dentry);
- }
- aufs_read_unlock(dentry, AuLock_DW);
-out_free:
- kfree(a);
-out:
- return err;
-}
diff --git a/fs/aufs/i_op_del.c b/fs/aufs/i_op_del.c
deleted file mode 100644
index 68741aadb..000000000
--- a/fs/aufs/i_op_del.c
+++ /dev/null
@@ -1,497 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * inode operations (del entry)
- */
-
-#include "aufs.h"
-
-/*
- * decide if a new whiteout for @dentry is necessary or not.
- * when it is necessary, prepare the parent dir for the upper branch whose
- * branch index is @bcpup for creation. the actual creation of the whiteout will
- * be done by caller.
- * return value:
- * 0: wh is unnecessary
- * plus: wh is necessary
- * minus: error
- */
-int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
-{
- int need_wh, err;
- aufs_bindex_t bstart;
- struct super_block *sb;
-
- sb = dentry->d_sb;
- bstart = au_dbstart(dentry);
- if (*bcpup < 0) {
- *bcpup = bstart;
- if (au_test_ro(sb, bstart, d_inode(dentry))) {
- err = AuWbrCopyup(au_sbi(sb), dentry);
- *bcpup = err;
- if (unlikely(err < 0))
- goto out;
- }
- } else
- AuDebugOn(bstart < *bcpup
- || au_test_ro(sb, *bcpup, d_inode(dentry)));
- AuDbg("bcpup %d, bstart %d\n", *bcpup, bstart);
-
- if (*bcpup != bstart) {
- err = au_cpup_dirs(dentry, *bcpup);
- if (unlikely(err))
- goto out;
- need_wh = 1;
- } else {
- struct au_dinfo *dinfo, *tmp;
-
- need_wh = -ENOMEM;
- dinfo = au_di(dentry);
- tmp = au_di_alloc(sb, AuLsc_DI_TMP);
- if (tmp) {
- au_di_cp(tmp, dinfo);
- au_di_swap(tmp, dinfo);
- /* returns the number of positive dentries */
- need_wh = au_lkup_dentry(dentry, bstart + 1, /*type*/0);
- au_di_swap(tmp, dinfo);
- au_rw_write_unlock(&tmp->di_rwsem);
- au_di_free(tmp);
- }
- }
- AuDbg("need_wh %d\n", need_wh);
- err = need_wh;
-
-out:
- return err;
-}
-
-/*
- * simple tests for the del-entry operations.
- * following the checks in vfs, plus the parent-child relationship.
- */
-int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
- struct dentry *h_parent, int isdir)
-{
- int err;
- umode_t h_mode;
- struct dentry *h_dentry, *h_latest;
- struct inode *h_inode;
-
- h_dentry = au_h_dptr(dentry, bindex);
- if (d_really_is_positive(dentry)) {
- err = -ENOENT;
- if (unlikely(d_is_negative(h_dentry)))
- goto out;
- h_inode = d_inode(h_dentry);
- if (unlikely(!h_inode->i_nlink))
- goto out;
-
- h_mode = h_inode->i_mode;
- if (!isdir) {
- err = -EISDIR;
- if (unlikely(S_ISDIR(h_mode)))
- goto out;
- } else if (unlikely(!S_ISDIR(h_mode))) {
- err = -ENOTDIR;
- goto out;
- }
- } else {
- /* rename(2) case */
- err = -EIO;
- if (unlikely(d_is_positive(h_dentry)))
- goto out;
- }
-
- err = -ENOENT;
- /* expected parent dir is locked */
- if (unlikely(h_parent != h_dentry->d_parent))
- goto out;
- err = 0;
-
- /*
- * rmdir a dir may break the consistency on some filesystem.
- * let's try heavy test.
- */
- err = -EACCES;
- if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1)
- && au_test_h_perm(d_inode(h_parent),
- MAY_EXEC | MAY_WRITE)))
- goto out;
-
- h_latest = au_sio_lkup_one(&dentry->d_name, h_parent);
- err = -EIO;
- if (IS_ERR(h_latest))
- goto out;
- if (h_latest == h_dentry)
- err = 0;
- dput(h_latest);
-
-out:
- return err;
-}
-
-/*
- * decide the branch where we operate for @dentry. the branch index will be set
- * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent
- * dir for reverting.
- * when a new whiteout is necessary, create it.
- */
-static struct dentry*
-lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
- struct au_dtime *dt, struct au_pin *pin)
-{
- struct dentry *wh_dentry;
- struct super_block *sb;
- struct path h_path;
- int err, need_wh;
- unsigned int udba;
- aufs_bindex_t bcpup;
-
- need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
- wh_dentry = ERR_PTR(need_wh);
- if (unlikely(need_wh < 0))
- goto out;
-
- sb = dentry->d_sb;
- udba = au_opt_udba(sb);
- bcpup = *rbcpup;
- err = au_pin(pin, dentry, bcpup, udba,
- AuPin_DI_LOCKED | AuPin_MNT_WRITE);
- wh_dentry = ERR_PTR(err);
- if (unlikely(err))
- goto out;
-
- h_path.dentry = au_pinned_h_parent(pin);
- if (udba != AuOpt_UDBA_NONE
- && au_dbstart(dentry) == bcpup) {
- err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
- wh_dentry = ERR_PTR(err);
- if (unlikely(err))
- goto out_unpin;
- }
-
- h_path.mnt = au_sbr_mnt(sb, bcpup);
- au_dtime_store(dt, au_pinned_parent(pin), &h_path);
- wh_dentry = NULL;
- if (!need_wh)
- goto out; /* success, no need to create whiteout */
-
- wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
- if (IS_ERR(wh_dentry))
- goto out_unpin;
-
- /* returns with the parent is locked and wh_dentry is dget-ed */
- goto out; /* success */
-
-out_unpin:
- au_unpin(pin);
-out:
- return wh_dentry;
-}
-
-/*
- * when removing a dir, rename it to a unique temporary whiteout-ed name first
- * in order to be revertible and save time for removing many child whiteouts
- * under the dir.
- * returns 1 when there are too many child whiteout and caller should remove
- * them asynchronously. returns 0 when the number of children is enough small to
- * remove now or the branch fs is a remote fs.
- * otherwise return an error.
- */
-static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
- struct au_nhash *whlist, struct inode *dir)
-{
- int rmdir_later, err, dirwh;
- struct dentry *h_dentry;
- struct super_block *sb;
- struct inode *inode;
-
- sb = dentry->d_sb;
- SiMustAnyLock(sb);
- h_dentry = au_h_dptr(dentry, bindex);
- err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
- if (unlikely(err))
- goto out;
-
- /* stop monitoring */
- inode = d_inode(dentry);
- au_hn_free(au_hi(inode, bindex));
-
- if (!au_test_fs_remote(h_dentry->d_sb)) {
- dirwh = au_sbi(sb)->si_dirwh;
- rmdir_later = (dirwh <= 1);
- if (!rmdir_later)
- rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
- dirwh);
- if (rmdir_later)
- return rmdir_later;
- }
-
- err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
- if (unlikely(err)) {
- AuIOErr("rmdir %pd, b%d failed, %d. ignored\n",
- h_dentry, bindex, err);
- err = 0;
- }
-
-out:
- AuTraceErr(err);
- return err;
-}
-
-/*
- * final procedure for deleting a entry.
- * maintain dentry and iattr.
- */
-static void epilog(struct inode *dir, struct dentry *dentry,
- aufs_bindex_t bindex)
-{
- struct inode *inode;
-
- inode = d_inode(dentry);
- d_drop(dentry);
- inode->i_ctime = dir->i_ctime;
-
- au_dir_ts(dir, bindex);
- dir->i_version++;
-}
-
-/*
- * when an error happened, remove the created whiteout and revert everything.
- */
-static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex,
- aufs_bindex_t bwh, struct dentry *wh_dentry,
- struct dentry *dentry, struct au_dtime *dt)
-{
- int rerr;
- struct path h_path = {
- .dentry = wh_dentry,
- .mnt = au_sbr_mnt(dir->i_sb, bindex)
- };
-
- rerr = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, dentry);
- if (!rerr) {
- au_set_dbwh(dentry, bwh);
- au_dtime_revert(dt);
- return 0;
- }
-
- AuIOErr("%pd reverting whiteout failed(%d, %d)\n", dentry, err, rerr);
- return -EIO;
-}
-
-/* ---------------------------------------------------------------------- */
-
-int aufs_unlink(struct inode *dir, struct dentry *dentry)
-{
- int err;
- aufs_bindex_t bwh, bindex, bstart;
- struct inode *inode, *h_dir, *delegated;
- struct dentry *parent, *wh_dentry;
- /* to reuduce stack size */
- struct {
- struct au_dtime dt;
- struct au_pin pin;
- struct path h_path;
- } *a;
-
- IMustLock(dir);
-
- err = -ENOMEM;
- a = kmalloc(sizeof(*a), GFP_NOFS);
- if (unlikely(!a))
- goto out;
-
- err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
- if (unlikely(err))
- goto out_free;
- err = au_d_hashed_positive(dentry);
- if (unlikely(err))
- goto out_unlock;
- inode = d_inode(dentry);
- IMustLock(inode);
- err = -EISDIR;
- if (unlikely(d_is_dir(dentry)))
- goto out_unlock; /* possible? */
-
- bstart = au_dbstart(dentry);
- bwh = au_dbwh(dentry);
- bindex = -1;
- parent = dentry->d_parent; /* dir inode is locked */
- di_write_lock_parent(parent);
- wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &a->dt,
- &a->pin);
- err = PTR_ERR(wh_dentry);
- if (IS_ERR(wh_dentry))
- goto out_parent;
-
- a->h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
- a->h_path.dentry = au_h_dptr(dentry, bstart);
- dget(a->h_path.dentry);
- if (bindex == bstart) {
- h_dir = au_pinned_h_dir(&a->pin);
- delegated = NULL;
- err = vfsub_unlink(h_dir, &a->h_path, &delegated, /*force*/0);
- if (unlikely(err == -EWOULDBLOCK)) {
- pr_warn("cannot retry for NFSv4 delegation"
- " for an internal unlink\n");
- iput(delegated);
- }
- } else {
- /* dir inode is locked */
- h_dir = d_inode(wh_dentry->d_parent);
- IMustLock(h_dir);
- err = 0;
- }
-
- if (!err) {
- vfsub_drop_nlink(inode);
- epilog(dir, dentry, bindex);
-
- /* update target timestamps */
- if (bindex == bstart) {
- vfsub_update_h_iattr(&a->h_path, /*did*/NULL);
- /*ignore*/
- inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
- } else
- /* todo: this timestamp may be reverted later */
- inode->i_ctime = h_dir->i_ctime;
- goto out_unpin; /* success */
- }
-
- /* revert */
- if (wh_dentry) {
- int rerr;
-
- rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
- &a->dt);
- if (rerr)
- err = rerr;
- }
-
-out_unpin:
- au_unpin(&a->pin);
- dput(wh_dentry);
- dput(a->h_path.dentry);
-out_parent:
- di_write_unlock(parent);
-out_unlock:
- aufs_read_unlock(dentry, AuLock_DW);
-out_free:
- kfree(a);
-out:
- return err;
-}
-
-int aufs_rmdir(struct inode *dir, struct dentry *dentry)
-{
- int err, rmdir_later;
- aufs_bindex_t bwh, bindex, bstart;
- struct inode *inode;
- struct dentry *parent, *wh_dentry, *h_dentry;
- struct au_whtmp_rmdir *args;
- /* to reuduce stack size */
- struct {
- struct au_dtime dt;
- struct au_pin pin;
- } *a;
-
- IMustLock(dir);
-
- err = -ENOMEM;
- a = kmalloc(sizeof(*a), GFP_NOFS);
- if (unlikely(!a))
- goto out;
-
- err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
- if (unlikely(err))
- goto out_free;
- err = au_alive_dir(dentry);
- if (unlikely(err))
- goto out_unlock;
- inode = d_inode(dentry);
- IMustLock(inode);
- err = -ENOTDIR;
- if (unlikely(!d_is_dir(dentry)))
- goto out_unlock; /* possible? */
-
- err = -ENOMEM;
- args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
- if (unlikely(!args))
- goto out_unlock;
-
- parent = dentry->d_parent; /* dir inode is locked */
- di_write_lock_parent(parent);
- err = au_test_empty(dentry, &args->whlist);
- if (unlikely(err))
- goto out_parent;
-
- bstart = au_dbstart(dentry);
- bwh = au_dbwh(dentry);
- bindex = -1;
- wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &a->dt,
- &a->pin);
- err = PTR_ERR(wh_dentry);
- if (IS_ERR(wh_dentry))
- goto out_parent;
-
- h_dentry = au_h_dptr(dentry, bstart);
- dget(h_dentry);
- rmdir_later = 0;
- if (bindex == bstart) {
- err = renwh_and_rmdir(dentry, bstart, &args->whlist, dir);
- if (err > 0) {
- rmdir_later = err;
- err = 0;
- }
- } else {
- /* stop monitoring */
- au_hn_free(au_hi(inode, bstart));
-
- /* dir inode is locked */
- IMustLock(d_inode(wh_dentry->d_parent));
- err = 0;
- }
-
- if (!err) {
- vfsub_dead_dir(inode);
- au_set_dbdiropq(dentry, -1);
- epilog(dir, dentry, bindex);
-
- if (rmdir_later) {
- au_whtmp_kick_rmdir(dir, bstart, h_dentry, args);
- args = NULL;
- }
-
- goto out_unpin; /* success */
- }
-
- /* revert */
- AuLabel(revert);
- if (wh_dentry) {
- int rerr;
-
- rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
- &a->dt);
- if (rerr)
- err = rerr;
- }
-
-out_unpin:
- au_unpin(&a->pin);
- dput(wh_dentry);
- dput(h_dentry);
-out_parent:
- di_write_unlock(parent);
- if (args)
- au_whtmp_rmdir_free(args);
-out_unlock:
- aufs_read_unlock(dentry, AuLock_DW);
-out_free:
- kfree(a);
-out:
- AuTraceErr(err);
- return err;
-}
diff --git a/fs/aufs/i_op_ren.c b/fs/aufs/i_op_ren.c
deleted file mode 100644
index c880144b5..000000000
--- a/fs/aufs/i_op_ren.c
+++ /dev/null
@@ -1,1002 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * inode operation (rename entry)
- * todo: this is crazy monster
- */
-
-#include "aufs.h"
-
-enum { AuSRC, AuDST, AuSrcDst };
-enum { AuPARENT, AuCHILD, AuParentChild };
-
-#define AuRen_ISDIR 1
-#define AuRen_ISSAMEDIR (1 << 1)
-#define AuRen_WHSRC (1 << 2)
-#define AuRen_WHDST (1 << 3)
-#define AuRen_MNT_WRITE (1 << 4)
-#define AuRen_DT_DSTDIR (1 << 5)
-#define AuRen_DIROPQ (1 << 6)
-#define au_ftest_ren(flags, name) ((flags) & AuRen_##name)
-#define au_fset_ren(flags, name) \
- do { (flags) |= AuRen_##name; } while (0)
-#define au_fclr_ren(flags, name) \
- do { (flags) &= ~AuRen_##name; } while (0)
-
-struct au_ren_args {
- struct {
- struct dentry *dentry, *h_dentry, *parent, *h_parent,
- *wh_dentry;
- struct inode *dir, *inode;
- struct au_hinode *hdir;
- struct au_dtime dt[AuParentChild];
- aufs_bindex_t bstart;
- } sd[AuSrcDst];
-
-#define src_dentry sd[AuSRC].dentry
-#define src_dir sd[AuSRC].dir
-#define src_inode sd[AuSRC].inode
-#define src_h_dentry sd[AuSRC].h_dentry
-#define src_parent sd[AuSRC].parent
-#define src_h_parent sd[AuSRC].h_parent
-#define src_wh_dentry sd[AuSRC].wh_dentry
-#define src_hdir sd[AuSRC].hdir
-#define src_h_dir sd[AuSRC].hdir->hi_inode
-#define src_dt sd[AuSRC].dt
-#define src_bstart sd[AuSRC].bstart
-
-#define dst_dentry sd[AuDST].dentry
-#define dst_dir sd[AuDST].dir
-#define dst_inode sd[AuDST].inode
-#define dst_h_dentry sd[AuDST].h_dentry
-#define dst_parent sd[AuDST].parent
-#define dst_h_parent sd[AuDST].h_parent
-#define dst_wh_dentry sd[AuDST].wh_dentry
-#define dst_hdir sd[AuDST].hdir
-#define dst_h_dir sd[AuDST].hdir->hi_inode
-#define dst_dt sd[AuDST].dt
-#define dst_bstart sd[AuDST].bstart
-
- struct dentry *h_trap;
- struct au_branch *br;
- struct au_hinode *src_hinode;
- struct path h_path;
- struct au_nhash whlist;
- aufs_bindex_t btgt, src_bwh, src_bdiropq;
-
- unsigned int flags;
-
- struct au_whtmp_rmdir *thargs;
- struct dentry *h_dst;
-};
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * functions for reverting.
- * when an error happened in a single rename systemcall, we should revert
- * everything as if nothing happened.
- * we don't need to revert the copied-up/down the parent dir since they are
- * harmless.
- */
-
-#define RevertFailure(fmt, ...) do { \
- AuIOErr("revert failure: " fmt " (%d, %d)\n", \
- ##__VA_ARGS__, err, rerr); \
- err = -EIO; \
-} while (0)
-
-static void au_ren_rev_diropq(int err, struct au_ren_args *a)
-{
- int rerr;
-
- au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
- rerr = au_diropq_remove(a->src_dentry, a->btgt);
- au_hn_imtx_unlock(a->src_hinode);
- au_set_dbdiropq(a->src_dentry, a->src_bdiropq);
- if (rerr)
- RevertFailure("remove diropq %pd", a->src_dentry);
-}
-
-static void au_ren_rev_rename(int err, struct au_ren_args *a)
-{
- int rerr;
- struct inode *delegated;
-
- a->h_path.dentry = vfsub_lkup_one(&a->src_dentry->d_name,
- a->src_h_parent);
- rerr = PTR_ERR(a->h_path.dentry);
- if (IS_ERR(a->h_path.dentry)) {
- RevertFailure("lkup one %pd", a->src_dentry);
- return;
- }
-
- delegated = NULL;
- rerr = vfsub_rename(a->dst_h_dir,
- au_h_dptr(a->src_dentry, a->btgt),
- a->src_h_dir, &a->h_path, &delegated);
- if (unlikely(rerr == -EWOULDBLOCK)) {
- pr_warn("cannot retry for NFSv4 delegation"
- " for an internal rename\n");
- iput(delegated);
- }
- d_drop(a->h_path.dentry);
- dput(a->h_path.dentry);
- /* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */
- if (rerr)
- RevertFailure("rename %pd", a->src_dentry);
-}
-
-static void au_ren_rev_whtmp(int err, struct au_ren_args *a)
-{
- int rerr;
- struct inode *delegated;
-
- a->h_path.dentry = vfsub_lkup_one(&a->dst_dentry->d_name,
- a->dst_h_parent);
- rerr = PTR_ERR(a->h_path.dentry);
- if (IS_ERR(a->h_path.dentry)) {
- RevertFailure("lkup one %pd", a->dst_dentry);
- return;
- }
- if (d_is_positive(a->h_path.dentry)) {
- d_drop(a->h_path.dentry);
- dput(a->h_path.dentry);
- return;
- }
-
- delegated = NULL;
- rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path,
- &delegated);
- if (unlikely(rerr == -EWOULDBLOCK)) {
- pr_warn("cannot retry for NFSv4 delegation"
- " for an internal rename\n");
- iput(delegated);
- }
- d_drop(a->h_path.dentry);
- dput(a->h_path.dentry);
- if (!rerr)
- au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst));
- else
- RevertFailure("rename %pd", a->h_dst);
-}
-
-static void au_ren_rev_whsrc(int err, struct au_ren_args *a)
-{
- int rerr;
-
- a->h_path.dentry = a->src_wh_dentry;
- rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry);
- au_set_dbwh(a->src_dentry, a->src_bwh);
- if (rerr)
- RevertFailure("unlink %pd", a->src_wh_dentry);
-}
-#undef RevertFailure
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * when we have to copyup the renaming entry, do it with the rename-target name
- * in order to minimize the cost (the later actual rename is unnecessary).
- * otherwise rename it on the target branch.
- */
-static int au_ren_or_cpup(struct au_ren_args *a)
-{
- int err;
- struct dentry *d;
- struct inode *delegated;
-
- d = a->src_dentry;
- if (au_dbstart(d) == a->btgt) {
- a->h_path.dentry = a->dst_h_dentry;
- if (au_ftest_ren(a->flags, DIROPQ)
- && au_dbdiropq(d) == a->btgt)
- au_fclr_ren(a->flags, DIROPQ);
- AuDebugOn(au_dbstart(d) != a->btgt);
- delegated = NULL;
- err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
- a->dst_h_dir, &a->h_path, &delegated);
- if (unlikely(err == -EWOULDBLOCK)) {
- pr_warn("cannot retry for NFSv4 delegation"
- " for an internal rename\n");
- iput(delegated);
- }
- } else
- BUG();
-
- if (!err && a->h_dst)
- /* it will be set to dinfo later */
- dget(a->h_dst);
-
- return err;
-}
-
-/* cf. aufs_rmdir() */
-static int au_ren_del_whtmp(struct au_ren_args *a)
-{
- int err;
- struct inode *dir;
-
- dir = a->dst_dir;
- SiMustAnyLock(dir->i_sb);
- if (!au_nhash_test_longer_wh(&a->whlist, a->btgt,
- au_sbi(dir->i_sb)->si_dirwh)
- || au_test_fs_remote(a->h_dst->d_sb)) {
- err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist);
- if (unlikely(err))
- pr_warn("failed removing whtmp dir %pd (%d), "
- "ignored.\n", a->h_dst, err);
- } else {
- au_nhash_wh_free(&a->thargs->whlist);
- a->thargs->whlist = a->whlist;
- a->whlist.nh_num = 0;
- au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs);
- dput(a->h_dst);
- a->thargs = NULL;
- }
-
- return 0;
-}
-
-/* make it 'opaque' dir. */
-static int au_ren_diropq(struct au_ren_args *a)
-{
- int err;
- struct dentry *diropq;
-
- err = 0;
- a->src_bdiropq = au_dbdiropq(a->src_dentry);
- a->src_hinode = au_hi(a->src_inode, a->btgt);
- au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
- diropq = au_diropq_create(a->src_dentry, a->btgt);
- au_hn_imtx_unlock(a->src_hinode);
- if (IS_ERR(diropq))
- err = PTR_ERR(diropq);
- else
- dput(diropq);
-
- return err;
-}
-
-static int do_rename(struct au_ren_args *a)
-{
- int err;
- struct dentry *d, *h_d;
-
- /* prepare workqueue args for asynchronous rmdir */
- h_d = a->dst_h_dentry;
- if (au_ftest_ren(a->flags, ISDIR) && d_is_positive(h_d)) {
- err = -ENOMEM;
- a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb, GFP_NOFS);
- if (unlikely(!a->thargs))
- goto out;
- a->h_dst = dget(h_d);
- }
-
- /* create whiteout for src_dentry */
- if (au_ftest_ren(a->flags, WHSRC)) {
- a->src_bwh = au_dbwh(a->src_dentry);
- AuDebugOn(a->src_bwh >= 0);
- a->src_wh_dentry
- = au_wh_create(a->src_dentry, a->btgt, a->src_h_parent);
- err = PTR_ERR(a->src_wh_dentry);
- if (IS_ERR(a->src_wh_dentry))
- goto out_thargs;
- }
-
- /* lookup whiteout for dentry */
- if (au_ftest_ren(a->flags, WHDST)) {
- h_d = au_wh_lkup(a->dst_h_parent, &a->dst_dentry->d_name,
- a->br);
- err = PTR_ERR(h_d);
- if (IS_ERR(h_d))
- goto out_whsrc;
- if (d_is_negative(h_d))
- dput(h_d);
- else
- a->dst_wh_dentry = h_d;
- }
-
- /* rename dentry to tmpwh */
- if (a->thargs) {
- err = au_whtmp_ren(a->dst_h_dentry, a->br);
- if (unlikely(err))
- goto out_whdst;
-
- d = a->dst_dentry;
- au_set_h_dptr(d, a->btgt, NULL);
- err = au_lkup_neg(d, a->btgt, /*wh*/0);
- if (unlikely(err))
- goto out_whtmp;
- a->dst_h_dentry = au_h_dptr(d, a->btgt);
- }
-
- BUG_ON(d_is_positive(a->dst_h_dentry) && a->src_bstart != a->btgt);
-
- /* rename by vfs_rename or cpup */
- d = a->dst_dentry;
- if (au_ftest_ren(a->flags, ISDIR)
- && (a->dst_wh_dentry
- || au_dbdiropq(d) == a->btgt
- /* hide the lower to keep xino */
- || a->btgt < au_dbend(d)
- || au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ)))
- au_fset_ren(a->flags, DIROPQ);
- err = au_ren_or_cpup(a);
- if (unlikely(err))
- /* leave the copied-up one */
- goto out_whtmp;
-
- /* make dir opaque */
- if (au_ftest_ren(a->flags, DIROPQ)) {
- err = au_ren_diropq(a);
- if (unlikely(err))
- goto out_rename;
- }
-
- /* update target timestamps */
- AuDebugOn(au_dbstart(a->src_dentry) != a->btgt);
- a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
- vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
- a->src_inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
-
- /* remove whiteout for dentry */
- if (a->dst_wh_dentry) {
- a->h_path.dentry = a->dst_wh_dentry;
- err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path,
- a->dst_dentry);
- if (unlikely(err))
- goto out_diropq;
- }
-
- /* remove whtmp */
- if (a->thargs)
- au_ren_del_whtmp(a); /* ignore this error */
-
- au_fhsm_wrote(a->src_dentry->d_sb, a->btgt, /*force*/0);
- err = 0;
- goto out_success;
-
-out_diropq:
- if (au_ftest_ren(a->flags, DIROPQ))
- au_ren_rev_diropq(err, a);
-out_rename:
- au_ren_rev_rename(err, a);
- dput(a->h_dst);
-out_whtmp:
- if (a->thargs)
- au_ren_rev_whtmp(err, a);
-out_whdst:
- dput(a->dst_wh_dentry);
- a->dst_wh_dentry = NULL;
-out_whsrc:
- if (a->src_wh_dentry)
- au_ren_rev_whsrc(err, a);
-out_success:
- dput(a->src_wh_dentry);
- dput(a->dst_wh_dentry);
-out_thargs:
- if (a->thargs) {
- dput(a->h_dst);
- au_whtmp_rmdir_free(a->thargs);
- a->thargs = NULL;
- }
-out:
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * test if @dentry dir can be rename destination or not.
- * success means, it is a logically empty dir.
- */
-static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist)
-{
- return au_test_empty(dentry, whlist);
-}
-
-/*
- * test if @dentry dir can be rename source or not.
- * if it can, return 0 and @children is filled.
- * success means,
- * - it is a logically empty dir.
- * - or, it exists on writable branch and has no children including whiteouts
- * on the lower branch.
- */
-static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt)
-{
- int err;
- unsigned int rdhash;
- aufs_bindex_t bstart;
-
- bstart = au_dbstart(dentry);
- if (bstart != btgt) {
- struct au_nhash whlist;
-
- SiMustAnyLock(dentry->d_sb);
- rdhash = au_sbi(dentry->d_sb)->si_rdhash;
- if (!rdhash)
- rdhash = au_rdhash_est(au_dir_size(/*file*/NULL,
- dentry));
- err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
- if (unlikely(err))
- goto out;
- err = au_test_empty(dentry, &whlist);
- au_nhash_wh_free(&whlist);
- goto out;
- }
-
- if (bstart == au_dbtaildir(dentry))
- return 0; /* success */
-
- err = au_test_empty_lower(dentry);
-
-out:
- if (err == -ENOTEMPTY) {
- AuWarn1("renaming dir who has child(ren) on multiple branches,"
- " is not supported\n");
- err = -EXDEV;
- }
- return err;
-}
-
-/* side effect: sets whlist and h_dentry */
-static int au_ren_may_dir(struct au_ren_args *a)
-{
- int err;
- unsigned int rdhash;
- struct dentry *d;
-
- d = a->dst_dentry;
- SiMustAnyLock(d->d_sb);
-
- err = 0;
- if (au_ftest_ren(a->flags, ISDIR) && a->dst_inode) {
- rdhash = au_sbi(d->d_sb)->si_rdhash;
- if (!rdhash)
- rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d));
- err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS);
- if (unlikely(err))
- goto out;
-
- au_set_dbstart(d, a->dst_bstart);
- err = may_rename_dstdir(d, &a->whlist);
- au_set_dbstart(d, a->btgt);
- }
- a->dst_h_dentry = au_h_dptr(d, au_dbstart(d));
- if (unlikely(err))
- goto out;
-
- d = a->src_dentry;
- a->src_h_dentry = au_h_dptr(d, au_dbstart(d));
- if (au_ftest_ren(a->flags, ISDIR)) {
- err = may_rename_srcdir(d, a->btgt);
- if (unlikely(err)) {
- au_nhash_wh_free(&a->whlist);
- a->whlist.nh_num = 0;
- }
- }
-out:
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * simple tests for rename.
- * following the checks in vfs, plus the parent-child relationship.
- */
-static int au_may_ren(struct au_ren_args *a)
-{
- int err, isdir;
- struct inode *h_inode;
-
- if (a->src_bstart == a->btgt) {
- err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
- au_ftest_ren(a->flags, ISDIR));
- if (unlikely(err))
- goto out;
- err = -EINVAL;
- if (unlikely(a->src_h_dentry == a->h_trap))
- goto out;
- }
-
- err = 0;
- if (a->dst_bstart != a->btgt)
- goto out;
-
- err = -ENOTEMPTY;
- if (unlikely(a->dst_h_dentry == a->h_trap))
- goto out;
-
- err = -EIO;
- isdir = !!au_ftest_ren(a->flags, ISDIR);
- if (d_really_is_negative(a->dst_dentry)) {
- if (d_is_negative(a->dst_h_dentry))
- err = au_may_add(a->dst_dentry, a->btgt,
- a->dst_h_parent, isdir);
- } else {
- if (unlikely(d_is_negative(a->dst_h_dentry)))
- goto out;
- h_inode = d_inode(a->dst_h_dentry);
- if (h_inode->i_nlink)
- err = au_may_del(a->dst_dentry, a->btgt,
- a->dst_h_parent, isdir);
- }
-
-out:
- if (unlikely(err == -ENOENT || err == -EEXIST))
- err = -EIO;
- AuTraceErr(err);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * locking order
- * (VFS)
- * - src_dir and dir by lock_rename()
- * - inode if exitsts
- * (aufs)
- * - lock all
- * + src_dentry and dentry by aufs_read_and_write_lock2() which calls,
- * + si_read_lock
- * + di_write_lock2_child()
- * + di_write_lock_child()
- * + ii_write_lock_child()
- * + di_write_lock_child2()
- * + ii_write_lock_child2()
- * + src_parent and parent
- * + di_write_lock_parent()
- * + ii_write_lock_parent()
- * + di_write_lock_parent2()
- * + ii_write_lock_parent2()
- * + lower src_dir and dir by vfsub_lock_rename()
- * + verify the every relationships between child and parent. if any
- * of them failed, unlock all and return -EBUSY.
- */
-static void au_ren_unlock(struct au_ren_args *a)
-{
- vfsub_unlock_rename(a->src_h_parent, a->src_hdir,
- a->dst_h_parent, a->dst_hdir);
- if (au_ftest_ren(a->flags, MNT_WRITE))
- vfsub_mnt_drop_write(au_br_mnt(a->br));
-}
-
-static int au_ren_lock(struct au_ren_args *a)
-{
- int err;
- unsigned int udba;
-
- err = 0;
- a->src_h_parent = au_h_dptr(a->src_parent, a->btgt);
- a->src_hdir = au_hi(a->src_dir, a->btgt);
- a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt);
- a->dst_hdir = au_hi(a->dst_dir, a->btgt);
-
- err = vfsub_mnt_want_write(au_br_mnt(a->br));
- if (unlikely(err))
- goto out;
- au_fset_ren(a->flags, MNT_WRITE);
- a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir,
- a->dst_h_parent, a->dst_hdir);
- udba = au_opt_udba(a->src_dentry->d_sb);
- if (unlikely(a->src_hdir->hi_inode != d_inode(a->src_h_parent)
- || a->dst_hdir->hi_inode != d_inode(a->dst_h_parent)))
- err = au_busy_or_stale();
- if (!err && au_dbstart(a->src_dentry) == a->btgt)
- err = au_h_verify(a->src_h_dentry, udba,
- d_inode(a->src_h_parent), a->src_h_parent,
- a->br);
- if (!err && au_dbstart(a->dst_dentry) == a->btgt)
- err = au_h_verify(a->dst_h_dentry, udba,
- d_inode(a->dst_h_parent), a->dst_h_parent,
- a->br);
- if (!err)
- goto out; /* success */
-
- err = au_busy_or_stale();
- au_ren_unlock(a);
-
-out:
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static void au_ren_refresh_dir(struct au_ren_args *a)
-{
- struct inode *dir;
-
- dir = a->dst_dir;
- dir->i_version++;
- if (au_ftest_ren(a->flags, ISDIR)) {
- /* is this updating defined in POSIX? */
- au_cpup_attr_timesizes(a->src_inode);
- au_cpup_attr_nlink(dir, /*force*/1);
- }
-
- au_dir_ts(dir, a->btgt);
-
- if (au_ftest_ren(a->flags, ISSAMEDIR))
- return;
-
- dir = a->src_dir;
- dir->i_version++;
- if (au_ftest_ren(a->flags, ISDIR))
- au_cpup_attr_nlink(dir, /*force*/1);
- au_dir_ts(dir, a->btgt);
-}
-
-static void au_ren_refresh(struct au_ren_args *a)
-{
- aufs_bindex_t bend, bindex;
- struct dentry *d, *h_d;
- struct inode *i, *h_i;
- struct super_block *sb;
-
- d = a->dst_dentry;
- d_drop(d);
- if (a->h_dst)
- /* already dget-ed by au_ren_or_cpup() */
- au_set_h_dptr(d, a->btgt, a->h_dst);
-
- i = a->dst_inode;
- if (i) {
- if (!au_ftest_ren(a->flags, ISDIR))
- vfsub_drop_nlink(i);
- else {
- vfsub_dead_dir(i);
- au_cpup_attr_timesizes(i);
- }
- au_update_dbrange(d, /*do_put_zero*/1);
- } else {
- bend = a->btgt;
- for (bindex = au_dbstart(d); bindex < bend; bindex++)
- au_set_h_dptr(d, bindex, NULL);
- bend = au_dbend(d);
- for (bindex = a->btgt + 1; bindex <= bend; bindex++)
- au_set_h_dptr(d, bindex, NULL);
- au_update_dbrange(d, /*do_put_zero*/0);
- }
-
- d = a->src_dentry;
- au_set_dbwh(d, -1);
- bend = au_dbend(d);
- for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
- h_d = au_h_dptr(d, bindex);
- if (h_d)
- au_set_h_dptr(d, bindex, NULL);
- }
- au_set_dbend(d, a->btgt);
-
- sb = d->d_sb;
- i = a->src_inode;
- if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
- return; /* success */
-
- bend = au_ibend(i);
- for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
- h_i = au_h_iptr(i, bindex);
- if (h_i) {
- au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0);
- /* ignore this error */
- au_set_h_iptr(i, bindex, NULL, 0);
- }
- }
- au_set_ibend(i, a->btgt);
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* mainly for link(2) and rename(2) */
-int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
-{
- aufs_bindex_t bdiropq, bwh;
- struct dentry *parent;
- struct au_branch *br;
-
- parent = dentry->d_parent;
- IMustLock(d_inode(parent)); /* dir is locked */
-
- bdiropq = au_dbdiropq(parent);
- bwh = au_dbwh(dentry);
- br = au_sbr(dentry->d_sb, btgt);
- if (au_br_rdonly(br)
- || (0 <= bdiropq && bdiropq < btgt)
- || (0 <= bwh && bwh < btgt))
- btgt = -1;
-
- AuDbg("btgt %d\n", btgt);
- return btgt;
-}
-
-/* sets src_bstart, dst_bstart and btgt */
-static int au_ren_wbr(struct au_ren_args *a)
-{
- int err;
- struct au_wr_dir_args wr_dir_args = {
- /* .force_btgt = -1, */
- .flags = AuWrDir_ADD_ENTRY
- };
-
- a->src_bstart = au_dbstart(a->src_dentry);
- a->dst_bstart = au_dbstart(a->dst_dentry);
- if (au_ftest_ren(a->flags, ISDIR))
- au_fset_wrdir(wr_dir_args.flags, ISDIR);
- wr_dir_args.force_btgt = a->src_bstart;
- if (a->dst_inode && a->dst_bstart < a->src_bstart)
- wr_dir_args.force_btgt = a->dst_bstart;
- wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
- err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
- a->btgt = err;
-
- return err;
-}
-
-static void au_ren_dt(struct au_ren_args *a)
-{
- a->h_path.dentry = a->src_h_parent;
- au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path);
- if (!au_ftest_ren(a->flags, ISSAMEDIR)) {
- a->h_path.dentry = a->dst_h_parent;
- au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path);
- }
-
- au_fclr_ren(a->flags, DT_DSTDIR);
- if (!au_ftest_ren(a->flags, ISDIR))
- return;
-
- a->h_path.dentry = a->src_h_dentry;
- au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path);
- if (d_is_positive(a->dst_h_dentry)) {
- au_fset_ren(a->flags, DT_DSTDIR);
- a->h_path.dentry = a->dst_h_dentry;
- au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path);
- }
-}
-
-static void au_ren_rev_dt(int err, struct au_ren_args *a)
-{
- struct dentry *h_d;
- struct mutex *h_mtx;
-
- au_dtime_revert(a->src_dt + AuPARENT);
- if (!au_ftest_ren(a->flags, ISSAMEDIR))
- au_dtime_revert(a->dst_dt + AuPARENT);
-
- if (au_ftest_ren(a->flags, ISDIR) && err != -EIO) {
- h_d = a->src_dt[AuCHILD].dt_h_path.dentry;
- h_mtx = &d_inode(h_d)->i_mutex;
- mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
- au_dtime_revert(a->src_dt + AuCHILD);
- mutex_unlock(h_mtx);
-
- if (au_ftest_ren(a->flags, DT_DSTDIR)) {
- h_d = a->dst_dt[AuCHILD].dt_h_path.dentry;
- h_mtx = &d_inode(h_d)->i_mutex;
- mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
- au_dtime_revert(a->dst_dt + AuCHILD);
- mutex_unlock(h_mtx);
- }
- }
-}
-
-/* ---------------------------------------------------------------------- */
-
-int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
- struct inode *_dst_dir, struct dentry *_dst_dentry)
-{
- int err, flags;
- /* reduce stack space */
- struct au_ren_args *a;
-
- AuDbg("%pd, %pd\n", _src_dentry, _dst_dentry);
- IMustLock(_src_dir);
- IMustLock(_dst_dir);
-
- err = -ENOMEM;
- BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE);
- a = kzalloc(sizeof(*a), GFP_NOFS);
- if (unlikely(!a))
- goto out;
-
- a->src_dir = _src_dir;
- a->src_dentry = _src_dentry;
- a->src_inode = NULL;
- if (d_really_is_positive(a->src_dentry))
- a->src_inode = d_inode(a->src_dentry);
- a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */
- a->dst_dir = _dst_dir;
- a->dst_dentry = _dst_dentry;
- a->dst_inode = NULL;
- if (d_really_is_positive(a->dst_dentry))
- a->dst_inode = d_inode(a->dst_dentry);
- a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */
- if (a->dst_inode) {
- IMustLock(a->dst_inode);
- au_igrab(a->dst_inode);
- }
-
- err = -ENOTDIR;
- flags = AuLock_FLUSH | AuLock_NOPLM | AuLock_GEN;
- if (d_is_dir(a->src_dentry)) {
- au_fset_ren(a->flags, ISDIR);
- if (unlikely(d_really_is_positive(a->dst_dentry)
- && !d_is_dir(a->dst_dentry)))
- goto out_free;
- flags |= AuLock_DIRS;
- }
- err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry, flags);
- if (unlikely(err))
- goto out_free;
-
- err = au_d_hashed_positive(a->src_dentry);
- if (unlikely(err))
- goto out_unlock;
- err = -ENOENT;
- if (a->dst_inode) {
- /*
- * If it is a dir, VFS unhash dst_dentry before this
- * function. It means we cannot rely upon d_unhashed().
- */
- if (unlikely(!a->dst_inode->i_nlink))
- goto out_unlock;
- if (!S_ISDIR(a->dst_inode->i_mode)) {
- err = au_d_hashed_positive(a->dst_dentry);
- if (unlikely(err))
- goto out_unlock;
- } else if (unlikely(IS_DEADDIR(a->dst_inode)))
- goto out_unlock;
- } else if (unlikely(d_unhashed(a->dst_dentry)))
- goto out_unlock;
-
- /*
- * is it possible?
- * yes, it happened (in linux-3.3-rcN) but I don't know why.
- * there may exist a problem somewhere else.
- */
- err = -EINVAL;
- if (unlikely(d_inode(a->dst_parent) == d_inode(a->src_dentry)))
- goto out_unlock;
-
- au_fset_ren(a->flags, ISSAMEDIR); /* temporary */
- di_write_lock_parent(a->dst_parent);
-
- /* which branch we process */
- err = au_ren_wbr(a);
- if (unlikely(err < 0))
- goto out_parent;
- a->br = au_sbr(a->dst_dentry->d_sb, a->btgt);
- a->h_path.mnt = au_br_mnt(a->br);
-
- /* are they available to be renamed */
- err = au_ren_may_dir(a);
- if (unlikely(err))
- goto out_children;
-
- /* prepare the writable parent dir on the same branch */
- if (a->dst_bstart == a->btgt) {
- au_fset_ren(a->flags, WHDST);
- } else {
- err = au_cpup_dirs(a->dst_dentry, a->btgt);
- if (unlikely(err))
- goto out_children;
- }
-
- if (a->src_dir != a->dst_dir) {
- /*
- * this temporary unlock is safe,
- * because both dir->i_mutex are locked.
- */
- di_write_unlock(a->dst_parent);
- di_write_lock_parent(a->src_parent);
- err = au_wr_dir_need_wh(a->src_dentry,
- au_ftest_ren(a->flags, ISDIR),
- &a->btgt);
- di_write_unlock(a->src_parent);
- di_write_lock2_parent(a->src_parent, a->dst_parent, /*isdir*/1);
- au_fclr_ren(a->flags, ISSAMEDIR);
- } else
- err = au_wr_dir_need_wh(a->src_dentry,
- au_ftest_ren(a->flags, ISDIR),
- &a->btgt);
- if (unlikely(err < 0))
- goto out_children;
- if (err)
- au_fset_ren(a->flags, WHSRC);
-
- /* cpup src */
- if (a->src_bstart != a->btgt) {
- struct au_pin pin;
-
- err = au_pin(&pin, a->src_dentry, a->btgt,
- au_opt_udba(a->src_dentry->d_sb),
- AuPin_DI_LOCKED | AuPin_MNT_WRITE);
- if (!err) {
- struct au_cp_generic cpg = {
- .dentry = a->src_dentry,
- .bdst = a->btgt,
- .bsrc = a->src_bstart,
- .len = -1,
- .pin = &pin,
- .flags = AuCpup_DTIME | AuCpup_HOPEN
- };
- AuDebugOn(au_dbstart(a->src_dentry) != a->src_bstart);
- err = au_sio_cpup_simple(&cpg);
- au_unpin(&pin);
- }
- if (unlikely(err))
- goto out_children;
- a->src_bstart = a->btgt;
- a->src_h_dentry = au_h_dptr(a->src_dentry, a->btgt);
- au_fset_ren(a->flags, WHSRC);
- }
-
- /* lock them all */
- err = au_ren_lock(a);
- if (unlikely(err))
- /* leave the copied-up one */
- goto out_children;
-
- if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE))
- err = au_may_ren(a);
- else if (unlikely(a->dst_dentry->d_name.len > AUFS_MAX_NAMELEN))
- err = -ENAMETOOLONG;
- if (unlikely(err))
- goto out_hdir;
-
- /* store timestamps to be revertible */
- au_ren_dt(a);
-
- /* here we go */
- err = do_rename(a);
- if (unlikely(err))
- goto out_dt;
-
- /* update dir attributes */
- au_ren_refresh_dir(a);
-
- /* dput/iput all lower dentries */
- au_ren_refresh(a);
-
- goto out_hdir; /* success */
-
-out_dt:
- au_ren_rev_dt(err, a);
-out_hdir:
- au_ren_unlock(a);
-out_children:
- au_nhash_wh_free(&a->whlist);
- if (err && a->dst_inode && a->dst_bstart != a->btgt) {
- AuDbg("bstart %d, btgt %d\n", a->dst_bstart, a->btgt);
- au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
- au_set_dbstart(a->dst_dentry, a->dst_bstart);
- }
-out_parent:
- if (!err)
- d_move(a->src_dentry, a->dst_dentry);
- else {
- au_update_dbstart(a->dst_dentry);
- if (!a->dst_inode)
- d_drop(a->dst_dentry);
- }
- if (au_ftest_ren(a->flags, ISSAMEDIR))
- di_write_unlock(a->dst_parent);
- else
- di_write_unlock2(a->src_parent, a->dst_parent);
-out_unlock:
- aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry);
-out_free:
- iput(a->dst_inode);
- if (a->thargs)
- au_whtmp_rmdir_free(a->thargs);
- kfree(a);
-out:
- AuTraceErr(err);
- return err;
-}
diff --git a/fs/aufs/iinfo.c b/fs/aufs/iinfo.c
deleted file mode 100644
index 67ef672a0..000000000
--- a/fs/aufs/iinfo.c
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * inode private data
- */
-
-#include "aufs.h"
-
-struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
-{
- struct inode *h_inode;
-
- IiMustAnyLock(inode);
-
- h_inode = au_ii(inode)->ii_hinode[0 + bindex].hi_inode;
- AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
- return h_inode;
-}
-
-/* todo: hard/soft set? */
-void au_hiput(struct au_hinode *hinode)
-{
- au_hn_free(hinode);
- dput(hinode->hi_whdentry);
- iput(hinode->hi_inode);
-}
-
-unsigned int au_hi_flags(struct inode *inode, int isdir)
-{
- unsigned int flags;
- const unsigned int mnt_flags = au_mntflags(inode->i_sb);
-
- flags = 0;
- if (au_opt_test(mnt_flags, XINO))
- au_fset_hi(flags, XINO);
- if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY))
- au_fset_hi(flags, HNOTIFY);
- return flags;
-}
-
-void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
- struct inode *h_inode, unsigned int flags)
-{
- struct au_hinode *hinode;
- struct inode *hi;
- struct au_iinfo *iinfo = au_ii(inode);
-
- IiMustWriteLock(inode);
-
- hinode = iinfo->ii_hinode + bindex;
- hi = hinode->hi_inode;
- AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
-
- if (hi)
- au_hiput(hinode);
- hinode->hi_inode = h_inode;
- if (h_inode) {
- int err;
- struct super_block *sb = inode->i_sb;
- struct au_branch *br;
-
- AuDebugOn(inode->i_mode
- && (h_inode->i_mode & S_IFMT)
- != (inode->i_mode & S_IFMT));
- if (bindex == iinfo->ii_bstart)
- au_cpup_igen(inode, h_inode);
- br = au_sbr(sb, bindex);
- hinode->hi_id = br->br_id;
- if (au_ftest_hi(flags, XINO)) {
- err = au_xino_write(sb, bindex, h_inode->i_ino,
- inode->i_ino);
- if (unlikely(err))
- AuIOErr1("failed au_xino_write() %d\n", err);
- }
-
- if (au_ftest_hi(flags, HNOTIFY)
- && au_br_hnotifyable(br->br_perm)) {
- err = au_hn_alloc(hinode, inode);
- if (unlikely(err))
- AuIOErr1("au_hn_alloc() %d\n", err);
- }
- }
-}
-
-void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
- struct dentry *h_wh)
-{
- struct au_hinode *hinode;
-
- IiMustWriteLock(inode);
-
- hinode = au_ii(inode)->ii_hinode + bindex;
- AuDebugOn(hinode->hi_whdentry);
- hinode->hi_whdentry = h_wh;
-}
-
-void au_update_iigen(struct inode *inode, int half)
-{
- struct au_iinfo *iinfo;
- struct au_iigen *iigen;
- unsigned int sigen;
-
- sigen = au_sigen(inode->i_sb);
- iinfo = au_ii(inode);
- iigen = &iinfo->ii_generation;
- spin_lock(&iigen->ig_spin);
- iigen->ig_generation = sigen;
- if (half)
- au_ig_fset(iigen->ig_flags, HALF_REFRESHED);
- else
- au_ig_fclr(iigen->ig_flags, HALF_REFRESHED);
- spin_unlock(&iigen->ig_spin);
-}
-
-/* it may be called at remount time, too */
-void au_update_ibrange(struct inode *inode, int do_put_zero)
-{
- struct au_iinfo *iinfo;
- aufs_bindex_t bindex, bend;
-
- iinfo = au_ii(inode);
- if (!iinfo)
- return;
-
- IiMustWriteLock(inode);
-
- if (do_put_zero && iinfo->ii_bstart >= 0) {
- for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
- bindex++) {
- struct inode *h_i;
-
- h_i = iinfo->ii_hinode[0 + bindex].hi_inode;
- if (h_i
- && !h_i->i_nlink
- && !(h_i->i_state & I_LINKABLE))
- au_set_h_iptr(inode, bindex, NULL, 0);
- }
- }
-
- iinfo->ii_bstart = -1;
- iinfo->ii_bend = -1;
- bend = au_sbend(inode->i_sb);
- for (bindex = 0; bindex <= bend; bindex++)
- if (iinfo->ii_hinode[0 + bindex].hi_inode) {
- iinfo->ii_bstart = bindex;
- break;
- }
- if (iinfo->ii_bstart >= 0)
- for (bindex = bend; bindex >= iinfo->ii_bstart; bindex--)
- if (iinfo->ii_hinode[0 + bindex].hi_inode) {
- iinfo->ii_bend = bindex;
- break;
- }
- AuDebugOn(iinfo->ii_bstart > iinfo->ii_bend);
-}
-
-/* ---------------------------------------------------------------------- */
-
-void au_icntnr_init_once(void *_c)
-{
- struct au_icntnr *c = _c;
- struct au_iinfo *iinfo = &c->iinfo;
- static struct lock_class_key aufs_ii;
-
- spin_lock_init(&iinfo->ii_generation.ig_spin);
- au_rw_init(&iinfo->ii_rwsem);
- au_rw_class(&iinfo->ii_rwsem, &aufs_ii);
- inode_init_once(&c->vfs_inode);
-}
-
-int au_iinfo_init(struct inode *inode)
-{
- struct au_iinfo *iinfo;
- struct super_block *sb;
- int nbr, i;
-
- sb = inode->i_sb;
- iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
- nbr = au_sbend(sb) + 1;
- if (unlikely(nbr <= 0))
- nbr = 1;
- iinfo->ii_hinode = kcalloc(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
- if (iinfo->ii_hinode) {
- au_ninodes_inc(sb);
- for (i = 0; i < nbr; i++)
- iinfo->ii_hinode[i].hi_id = -1;
-
- iinfo->ii_generation.ig_generation = au_sigen(sb);
- iinfo->ii_bstart = -1;
- iinfo->ii_bend = -1;
- iinfo->ii_vdir = NULL;
- return 0;
- }
- return -ENOMEM;
-}
-
-int au_ii_realloc(struct au_iinfo *iinfo, int nbr)
-{
- int err, sz;
- struct au_hinode *hip;
-
- AuRwMustWriteLock(&iinfo->ii_rwsem);
-
- err = -ENOMEM;
- sz = sizeof(*hip) * (iinfo->ii_bend + 1);
- if (!sz)
- sz = sizeof(*hip);
- hip = au_kzrealloc(iinfo->ii_hinode, sz, sizeof(*hip) * nbr, GFP_NOFS);
- if (hip) {
- iinfo->ii_hinode = hip;
- err = 0;
- }
-
- return err;
-}
-
-void au_iinfo_fin(struct inode *inode)
-{
- struct au_iinfo *iinfo;
- struct au_hinode *hi;
- struct super_block *sb;
- aufs_bindex_t bindex, bend;
- const unsigned char unlinked = !inode->i_nlink;
-
- iinfo = au_ii(inode);
- /* bad_inode case */
- if (!iinfo)
- return;
-
- sb = inode->i_sb;
- au_ninodes_dec(sb);
- if (si_pid_test(sb))
- au_xino_delete_inode(inode, unlinked);
- else {
- /*
- * it is safe to hide the dependency between sbinfo and
- * sb->s_umount.
- */
- lockdep_off();
- si_noflush_read_lock(sb);
- au_xino_delete_inode(inode, unlinked);
- si_read_unlock(sb);
- lockdep_on();
- }
-
- if (iinfo->ii_vdir)
- au_vdir_free(iinfo->ii_vdir);
-
- bindex = iinfo->ii_bstart;
- if (bindex >= 0) {
- hi = iinfo->ii_hinode + bindex;
- bend = iinfo->ii_bend;
- while (bindex++ <= bend) {
- if (hi->hi_inode)
- au_hiput(hi);
- hi++;
- }
- }
- kfree(iinfo->ii_hinode);
- iinfo->ii_hinode = NULL;
- AuRwDestroy(&iinfo->ii_rwsem);
-}
diff --git a/fs/aufs/inode.c b/fs/aufs/inode.c
deleted file mode 100644
index 5a87727ba..000000000
--- a/fs/aufs/inode.c
+++ /dev/null
@@ -1,514 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * inode functions
- */
-
-#include "aufs.h"
-
-struct inode *au_igrab(struct inode *inode)
-{
- if (inode) {
- AuDebugOn(!atomic_read(&inode->i_count));
- ihold(inode);
- }
- return inode;
-}
-
-static void au_refresh_hinode_attr(struct inode *inode, int do_version)
-{
- au_cpup_attr_all(inode, /*force*/0);
- au_update_iigen(inode, /*half*/1);
- if (do_version)
- inode->i_version++;
-}
-
-static int au_ii_refresh(struct inode *inode, int *update)
-{
- int err, e;
- umode_t type;
- aufs_bindex_t bindex, new_bindex;
- struct super_block *sb;
- struct au_iinfo *iinfo;
- struct au_hinode *p, *q, tmp;
-
- IiMustWriteLock(inode);
-
- *update = 0;
- sb = inode->i_sb;
- type = inode->i_mode & S_IFMT;
- iinfo = au_ii(inode);
- err = au_ii_realloc(iinfo, au_sbend(sb) + 1);
- if (unlikely(err))
- goto out;
-
- AuDebugOn(iinfo->ii_bstart < 0);
- p = iinfo->ii_hinode + iinfo->ii_bstart;
- for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
- bindex++, p++) {
- if (!p->hi_inode)
- continue;
-
- AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT));
- new_bindex = au_br_index(sb, p->hi_id);
- if (new_bindex == bindex)
- continue;
-
- if (new_bindex < 0) {
- *update = 1;
- au_hiput(p);
- p->hi_inode = NULL;
- continue;
- }
-
- if (new_bindex < iinfo->ii_bstart)
- iinfo->ii_bstart = new_bindex;
- if (iinfo->ii_bend < new_bindex)
- iinfo->ii_bend = new_bindex;
- /* swap two lower inode, and loop again */
- q = iinfo->ii_hinode + new_bindex;
- tmp = *q;
- *q = *p;
- *p = tmp;
- if (tmp.hi_inode) {
- bindex--;
- p--;
- }
- }
- au_update_ibrange(inode, /*do_put_zero*/0);
- e = au_dy_irefresh(inode);
- if (unlikely(e && !err))
- err = e;
-
-out:
- AuTraceErr(err);
- return err;
-}
-
-void au_refresh_iop(struct inode *inode, int force_getattr)
-{
- int type;
- struct au_sbinfo *sbi = au_sbi(inode->i_sb);
- const struct inode_operations *iop
- = force_getattr ? aufs_iop : sbi->si_iop_array;
-
- if (inode->i_op == iop)
- return;
-
- switch (inode->i_mode & S_IFMT) {
- case S_IFDIR:
- type = AuIop_DIR;
- break;
- case S_IFLNK:
- type = AuIop_SYMLINK;
- break;
- default:
- type = AuIop_OTHER;
- break;
- }
-
- inode->i_op = iop + type;
- /* unnecessary smp_wmb() */
-}
-
-int au_refresh_hinode_self(struct inode *inode)
-{
- int err, update;
-
- err = au_ii_refresh(inode, &update);
- if (!err)
- au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
-
- AuTraceErr(err);
- return err;
-}
-
-int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
-{
- int err, e, update;
- unsigned int flags;
- umode_t mode;
- aufs_bindex_t bindex, bend;
- unsigned char isdir;
- struct au_hinode *p;
- struct au_iinfo *iinfo;
-
- err = au_ii_refresh(inode, &update);
- if (unlikely(err))
- goto out;
-
- update = 0;
- iinfo = au_ii(inode);
- p = iinfo->ii_hinode + iinfo->ii_bstart;
- mode = (inode->i_mode & S_IFMT);
- isdir = S_ISDIR(mode);
- flags = au_hi_flags(inode, isdir);
- bend = au_dbend(dentry);
- for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
- struct inode *h_i, *h_inode;
- struct dentry *h_d;
-
- h_d = au_h_dptr(dentry, bindex);
- if (!h_d || d_is_negative(h_d))
- continue;
-
- h_inode = d_inode(h_d);
- AuDebugOn(mode != (h_inode->i_mode & S_IFMT));
- if (iinfo->ii_bstart <= bindex && bindex <= iinfo->ii_bend) {
- h_i = au_h_iptr(inode, bindex);
- if (h_i) {
- if (h_i == h_inode)
- continue;
- err = -EIO;
- break;
- }
- }
- if (bindex < iinfo->ii_bstart)
- iinfo->ii_bstart = bindex;
- if (iinfo->ii_bend < bindex)
- iinfo->ii_bend = bindex;
- au_set_h_iptr(inode, bindex, au_igrab(h_inode), flags);
- update = 1;
- }
- au_update_ibrange(inode, /*do_put_zero*/0);
- e = au_dy_irefresh(inode);
- if (unlikely(e && !err))
- err = e;
- if (!err)
- au_refresh_hinode_attr(inode, update && isdir);
-
-out:
- AuTraceErr(err);
- return err;
-}
-
-static int set_inode(struct inode *inode, struct dentry *dentry)
-{
- int err;
- unsigned int flags;
- umode_t mode;
- aufs_bindex_t bindex, bstart, btail;
- unsigned char isdir;
- struct dentry *h_dentry;
- struct inode *h_inode;
- struct au_iinfo *iinfo;
- struct inode_operations *iop;
-
- IiMustWriteLock(inode);
-
- err = 0;
- isdir = 0;
- iop = au_sbi(inode->i_sb)->si_iop_array;
- bstart = au_dbstart(dentry);
- h_dentry = au_h_dptr(dentry, bstart);
- h_inode = d_inode(h_dentry);
- mode = h_inode->i_mode;
- switch (mode & S_IFMT) {
- case S_IFREG:
- btail = au_dbtail(dentry);
- inode->i_op = iop + AuIop_OTHER;
- inode->i_fop = &aufs_file_fop;
- err = au_dy_iaop(inode, bstart, h_inode);
- if (unlikely(err))
- goto out;
- break;
- case S_IFDIR:
- isdir = 1;
- btail = au_dbtaildir(dentry);
- inode->i_op = iop + AuIop_DIR;
- inode->i_fop = &aufs_dir_fop;
- break;
- case S_IFLNK:
- btail = au_dbtail(dentry);
- inode->i_op = iop + AuIop_SYMLINK;
- break;
- case S_IFBLK:
- case S_IFCHR:
- case S_IFIFO:
- case S_IFSOCK:
- btail = au_dbtail(dentry);
- inode->i_op = iop + AuIop_OTHER;
- init_special_inode(inode, mode, h_inode->i_rdev);
- break;
- default:
- AuIOErr("Unknown file type 0%o\n", mode);
- err = -EIO;
- goto out;
- }
-
- /* do not set hnotify for whiteouted dirs (SHWH mode) */
- flags = au_hi_flags(inode, isdir);
- if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
- && au_ftest_hi(flags, HNOTIFY)
- && dentry->d_name.len > AUFS_WH_PFX_LEN
- && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
- au_fclr_hi(flags, HNOTIFY);
- iinfo = au_ii(inode);
- iinfo->ii_bstart = bstart;
- iinfo->ii_bend = btail;
- for (bindex = bstart; bindex <= btail; bindex++) {
- h_dentry = au_h_dptr(dentry, bindex);
- if (h_dentry)
- au_set_h_iptr(inode, bindex,
- au_igrab(d_inode(h_dentry)), flags);
- }
- au_cpup_attr_all(inode, /*force*/1);
- /*
- * to force calling aufs_get_acl() every time,
- * do not call cache_no_acl() for aufs inode.
- */
-
-out:
- return err;
-}
-
-/*
- * successful returns with iinfo write_locked
- * minus: errno
- * zero: success, matched
- * plus: no error, but unmatched
- */
-static int reval_inode(struct inode *inode, struct dentry *dentry)
-{
- int err;
- unsigned int gen, igflags;
- aufs_bindex_t bindex, bend;
- struct inode *h_inode, *h_dinode;
- struct dentry *h_dentry;
-
- /*
- * before this function, if aufs got any iinfo lock, it must be only
- * one, the parent dir.
- * it can happen by UDBA and the obsoleted inode number.
- */
- err = -EIO;
- if (unlikely(inode->i_ino == parent_ino(dentry)))
- goto out;
-
- err = 1;
- ii_write_lock_new_child(inode);
- h_dentry = au_h_dptr(dentry, au_dbstart(dentry));
- h_dinode = d_inode(h_dentry);
- bend = au_ibend(inode);
- for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
- h_inode = au_h_iptr(inode, bindex);
- if (!h_inode || h_inode != h_dinode)
- continue;
-
- err = 0;
- gen = au_iigen(inode, &igflags);
- if (gen == au_digen(dentry)
- && !au_ig_ftest(igflags, HALF_REFRESHED))
- break;
-
- /* fully refresh inode using dentry */
- err = au_refresh_hinode(inode, dentry);
- if (!err)
- au_update_iigen(inode, /*half*/0);
- break;
- }
-
- if (unlikely(err))
- ii_write_unlock(inode);
-out:
- return err;
-}
-
-int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
- unsigned int d_type, ino_t *ino)
-{
- int err;
- struct mutex *mtx;
-
- /* prevent hardlinked inode number from race condition */
- mtx = NULL;
- if (d_type != DT_DIR) {
- mtx = &au_sbr(sb, bindex)->br_xino.xi_nondir_mtx;
- mutex_lock(mtx);
- }
- err = au_xino_read(sb, bindex, h_ino, ino);
- if (unlikely(err))
- goto out;
-
- if (!*ino) {
- err = -EIO;
- *ino = au_xino_new_ino(sb);
- if (unlikely(!*ino))
- goto out;
- err = au_xino_write(sb, bindex, h_ino, *ino);
- if (unlikely(err))
- goto out;
- }
-
-out:
- if (mtx)
- mutex_unlock(mtx);
- return err;
-}
-
-/* successful returns with iinfo write_locked */
-/* todo: return with unlocked? */
-struct inode *au_new_inode(struct dentry *dentry, int must_new)
-{
- struct inode *inode, *h_inode;
- struct dentry *h_dentry;
- struct super_block *sb;
- struct mutex *mtx;
- ino_t h_ino, ino;
- int err;
- aufs_bindex_t bstart;
-
- sb = dentry->d_sb;
- bstart = au_dbstart(dentry);
- h_dentry = au_h_dptr(dentry, bstart);
- h_inode = d_inode(h_dentry);
- h_ino = h_inode->i_ino;
-
- /*
- * stop 'race'-ing between hardlinks under different
- * parents.
- */
- mtx = NULL;
- if (!d_is_dir(h_dentry))
- mtx = &au_sbr(sb, bstart)->br_xino.xi_nondir_mtx;
-
-new_ino:
- if (mtx)
- mutex_lock(mtx);
- err = au_xino_read(sb, bstart, h_ino, &ino);
- inode = ERR_PTR(err);
- if (unlikely(err))
- goto out;
-
- if (!ino) {
- ino = au_xino_new_ino(sb);
- if (unlikely(!ino)) {
- inode = ERR_PTR(-EIO);
- goto out;
- }
- }
-
- AuDbg("i%lu\n", (unsigned long)ino);
- inode = au_iget_locked(sb, ino);
- err = PTR_ERR(inode);
- if (IS_ERR(inode))
- goto out;
-
- AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
- if (inode->i_state & I_NEW) {
- /* verbose coding for lock class name */
- if (unlikely(d_is_symlink(h_dentry)))
- au_rw_class(&au_ii(inode)->ii_rwsem,
- au_lc_key + AuLcSymlink_IIINFO);
- else if (unlikely(d_is_dir(h_dentry)))
- au_rw_class(&au_ii(inode)->ii_rwsem,
- au_lc_key + AuLcDir_IIINFO);
- else /* likely */
- au_rw_class(&au_ii(inode)->ii_rwsem,
- au_lc_key + AuLcNonDir_IIINFO);
-
- ii_write_lock_new_child(inode);
- err = set_inode(inode, dentry);
- if (!err) {
- unlock_new_inode(inode);
- goto out; /* success */
- }
-
- /*
- * iget_failed() calls iput(), but we need to call
- * ii_write_unlock() after iget_failed(). so dirty hack for
- * i_count.
- */
- atomic_inc(&inode->i_count);
- iget_failed(inode);
- ii_write_unlock(inode);
- au_xino_write(sb, bstart, h_ino, /*ino*/0);
- /* ignore this error */
- goto out_iput;
- } else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) {
- /*
- * horrible race condition between lookup, readdir and copyup
- * (or something).
- */
- if (mtx)
- mutex_unlock(mtx);
- err = reval_inode(inode, dentry);
- if (unlikely(err < 0)) {
- mtx = NULL;
- goto out_iput;
- }
-
- if (!err) {
- mtx = NULL;
- goto out; /* success */
- } else if (mtx)
- mutex_lock(mtx);
- }
-
- if (unlikely(au_test_fs_unique_ino(h_inode)))
- AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
- " b%d, %s, %pd, hi%lu, i%lu.\n",
- bstart, au_sbtype(h_dentry->d_sb), dentry,
- (unsigned long)h_ino, (unsigned long)ino);
- ino = 0;
- err = au_xino_write(sb, bstart, h_ino, /*ino*/0);
- if (!err) {
- iput(inode);
- if (mtx)
- mutex_unlock(mtx);
- goto new_ino;
- }
-
-out_iput:
- iput(inode);
- inode = ERR_PTR(err);
-out:
- if (mtx)
- mutex_unlock(mtx);
- return inode;
-}
-
-/* ---------------------------------------------------------------------- */
-
-int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
- struct inode *inode)
-{
- int err;
- struct inode *hi;
-
- err = au_br_rdonly(au_sbr(sb, bindex));
-
- /* pseudo-link after flushed may happen out of bounds */
- if (!err
- && inode
- && au_ibstart(inode) <= bindex
- && bindex <= au_ibend(inode)) {
- /*
- * permission check is unnecessary since vfsub routine
- * will be called later
- */
- hi = au_h_iptr(inode, bindex);
- if (hi)
- err = IS_IMMUTABLE(hi) ? -EROFS : 0;
- }
-
- return err;
-}
-
-int au_test_h_perm(struct inode *h_inode, int mask)
-{
- if (uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
- return 0;
- return inode_permission(h_inode, mask);
-}
-
-int au_test_h_perm_sio(struct inode *h_inode, int mask)
-{
- if (au_test_nfs(h_inode->i_sb)
- && (mask & MAY_WRITE)
- && S_ISDIR(h_inode->i_mode))
- mask |= MAY_READ; /* force permission check */
- return au_test_h_perm(h_inode, mask);
-}
diff --git a/fs/aufs/inode.h b/fs/aufs/inode.h
deleted file mode 100644
index 534b9e814..000000000
--- a/fs/aufs/inode.h
+++ /dev/null
@@ -1,672 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * inode operations
- */
-
-#ifndef __AUFS_INODE_H__
-#define __AUFS_INODE_H__
-
-#ifdef __KERNEL__
-
-#include <linux/fsnotify.h>
-#include "rwsem.h"
-
-struct vfsmount;
-
-struct au_hnotify {
-#ifdef CONFIG_AUFS_HNOTIFY
-#ifdef CONFIG_AUFS_HFSNOTIFY
- /* never use fsnotify_add_vfsmount_mark() */
- struct fsnotify_mark hn_mark;
-#endif
- struct inode *hn_aufs_inode; /* no get/put */
-#endif
-} ____cacheline_aligned_in_smp;
-
-struct au_hinode {
- struct inode *hi_inode;
- aufs_bindex_t hi_id;
-#ifdef CONFIG_AUFS_HNOTIFY
- struct au_hnotify *hi_notify;
-#endif
-
- /* reference to the copied-up whiteout with get/put */
- struct dentry *hi_whdentry;
-};
-
-/* ig_flags */
-#define AuIG_HALF_REFRESHED 1
-#define au_ig_ftest(flags, name) ((flags) & AuIG_##name)
-#define au_ig_fset(flags, name) \
- do { (flags) |= AuIG_##name; } while (0)
-#define au_ig_fclr(flags, name) \
- do { (flags) &= ~AuIG_##name; } while (0)
-
-struct au_iigen {
- spinlock_t ig_spin;
- __u32 ig_generation, ig_flags;
-};
-
-struct au_vdir;
-struct au_iinfo {
- struct au_iigen ii_generation;
- struct super_block *ii_hsb1; /* no get/put */
-
- struct au_rwsem ii_rwsem;
- aufs_bindex_t ii_bstart, ii_bend;
- __u32 ii_higen;
- struct au_hinode *ii_hinode;
- struct au_vdir *ii_vdir;
-};
-
-struct au_icntnr {
- struct au_iinfo iinfo;
- struct inode vfs_inode;
-} ____cacheline_aligned_in_smp;
-
-/* au_pin flags */
-#define AuPin_DI_LOCKED 1
-#define AuPin_MNT_WRITE (1 << 1)
-#define au_ftest_pin(flags, name) ((flags) & AuPin_##name)
-#define au_fset_pin(flags, name) \
- do { (flags) |= AuPin_##name; } while (0)
-#define au_fclr_pin(flags, name) \
- do { (flags) &= ~AuPin_##name; } while (0)
-
-struct au_pin {
- /* input */
- struct dentry *dentry;
- unsigned int udba;
- unsigned char lsc_di, lsc_hi, flags;
- aufs_bindex_t bindex;
-
- /* output */
- struct dentry *parent;
- struct au_hinode *hdir;
- struct vfsmount *h_mnt;
-
- /* temporary unlock/relock for copyup */
- struct dentry *h_dentry, *h_parent;
- struct au_branch *br;
- struct task_struct *task;
-};
-
-void au_pin_hdir_unlock(struct au_pin *p);
-int au_pin_hdir_lock(struct au_pin *p);
-int au_pin_hdir_relock(struct au_pin *p);
-void au_pin_hdir_set_owner(struct au_pin *p, struct task_struct *task);
-void au_pin_hdir_acquire_nest(struct au_pin *p);
-void au_pin_hdir_release(struct au_pin *p);
-
-/* ---------------------------------------------------------------------- */
-
-static inline struct au_iinfo *au_ii(struct inode *inode)
-{
- struct au_iinfo *iinfo;
-
- iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
- if (iinfo->ii_hinode)
- return iinfo;
- return NULL; /* debugging bad_inode case */
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* inode.c */
-struct inode *au_igrab(struct inode *inode);
-void au_refresh_iop(struct inode *inode, int force_getattr);
-int au_refresh_hinode_self(struct inode *inode);
-int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
-int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
- unsigned int d_type, ino_t *ino);
-struct inode *au_new_inode(struct dentry *dentry, int must_new);
-int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
- struct inode *inode);
-int au_test_h_perm(struct inode *h_inode, int mask);
-int au_test_h_perm_sio(struct inode *h_inode, int mask);
-
-static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex,
- ino_t h_ino, unsigned int d_type, ino_t *ino)
-{
-#ifdef CONFIG_AUFS_SHWH
- return au_ino(sb, bindex, h_ino, d_type, ino);
-#else
- return 0;
-#endif
-}
-
-/* i_op.c */
-enum {
- AuIop_SYMLINK,
- AuIop_DIR,
- AuIop_OTHER,
- AuIop_Last
-};
-extern struct inode_operations aufs_iop[AuIop_Last],
- aufs_iop_nogetattr[AuIop_Last];
-
-/* au_wr_dir flags */
-#define AuWrDir_ADD_ENTRY 1
-#define AuWrDir_ISDIR (1 << 1)
-#define AuWrDir_TMPFILE (1 << 2)
-#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name)
-#define au_fset_wrdir(flags, name) \
- do { (flags) |= AuWrDir_##name; } while (0)
-#define au_fclr_wrdir(flags, name) \
- do { (flags) &= ~AuWrDir_##name; } while (0)
-
-struct au_wr_dir_args {
- aufs_bindex_t force_btgt;
- unsigned char flags;
-};
-int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
- struct au_wr_dir_args *args);
-
-struct dentry *au_pinned_h_parent(struct au_pin *pin);
-void au_pin_init(struct au_pin *pin, struct dentry *dentry,
- aufs_bindex_t bindex, int lsc_di, int lsc_hi,
- unsigned int udba, unsigned char flags);
-int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
- unsigned int udba, unsigned char flags) __must_check;
-int au_do_pin(struct au_pin *pin) __must_check;
-void au_unpin(struct au_pin *pin);
-int au_reval_for_attr(struct dentry *dentry, unsigned int sigen);
-
-#define AuIcpup_DID_CPUP 1
-#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name)
-#define au_fset_icpup(flags, name) \
- do { (flags) |= AuIcpup_##name; } while (0)
-#define au_fclr_icpup(flags, name) \
- do { (flags) &= ~AuIcpup_##name; } while (0)
-
-struct au_icpup_args {
- unsigned char flags;
- unsigned char pin_flags;
- aufs_bindex_t btgt;
- unsigned int udba;
- struct au_pin pin;
- struct path h_path;
- struct inode *h_inode;
-};
-
-int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
- struct au_icpup_args *a);
-
-int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path);
-
-/* i_op_add.c */
-int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
- struct dentry *h_parent, int isdir);
-int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
- dev_t dev);
-int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname);
-int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool want_excl);
-struct vfsub_aopen_args;
-int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
- struct vfsub_aopen_args *args);
-int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode);
-int aufs_link(struct dentry *src_dentry, struct inode *dir,
- struct dentry *dentry);
-int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
-
-/* i_op_del.c */
-int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
-int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
- struct dentry *h_parent, int isdir);
-int aufs_unlink(struct inode *dir, struct dentry *dentry);
-int aufs_rmdir(struct inode *dir, struct dentry *dentry);
-
-/* i_op_ren.c */
-int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
-int aufs_rename(struct inode *src_dir, struct dentry *src_dentry,
- struct inode *dir, struct dentry *dentry);
-
-/* iinfo.c */
-struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
-void au_hiput(struct au_hinode *hinode);
-void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
- struct dentry *h_wh);
-unsigned int au_hi_flags(struct inode *inode, int isdir);
-
-/* hinode flags */
-#define AuHi_XINO 1
-#define AuHi_HNOTIFY (1 << 1)
-#define au_ftest_hi(flags, name) ((flags) & AuHi_##name)
-#define au_fset_hi(flags, name) \
- do { (flags) |= AuHi_##name; } while (0)
-#define au_fclr_hi(flags, name) \
- do { (flags) &= ~AuHi_##name; } while (0)
-
-#ifndef CONFIG_AUFS_HNOTIFY
-#undef AuHi_HNOTIFY
-#define AuHi_HNOTIFY 0
-#endif
-
-void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
- struct inode *h_inode, unsigned int flags);
-
-void au_update_iigen(struct inode *inode, int half);
-void au_update_ibrange(struct inode *inode, int do_put_zero);
-
-void au_icntnr_init_once(void *_c);
-int au_iinfo_init(struct inode *inode);
-void au_iinfo_fin(struct inode *inode);
-int au_ii_realloc(struct au_iinfo *iinfo, int nbr);
-
-#ifdef CONFIG_PROC_FS
-/* plink.c */
-int au_plink_maint(struct super_block *sb, int flags);
-struct au_sbinfo;
-void au_plink_maint_leave(struct au_sbinfo *sbinfo);
-int au_plink_maint_enter(struct super_block *sb);
-#ifdef CONFIG_AUFS_DEBUG
-void au_plink_list(struct super_block *sb);
-#else
-AuStubVoid(au_plink_list, struct super_block *sb)
-#endif
-int au_plink_test(struct inode *inode);
-struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
-void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
- struct dentry *h_dentry);
-void au_plink_put(struct super_block *sb, int verbose);
-void au_plink_clean(struct super_block *sb, int verbose);
-void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
-#else
-AuStubInt0(au_plink_maint, struct super_block *sb, int flags);
-AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo);
-AuStubInt0(au_plink_maint_enter, struct super_block *sb);
-AuStubVoid(au_plink_list, struct super_block *sb);
-AuStubInt0(au_plink_test, struct inode *inode);
-AuStub(struct dentry *, au_plink_lkup, return NULL,
- struct inode *inode, aufs_bindex_t bindex);
-AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex,
- struct dentry *h_dentry);
-AuStubVoid(au_plink_put, struct super_block *sb, int verbose);
-AuStubVoid(au_plink_clean, struct super_block *sb, int verbose);
-AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id);
-#endif /* CONFIG_PROC_FS */
-
-#ifdef CONFIG_AUFS_XATTR
-/* xattr.c */
-int au_cpup_xattr(struct dentry *h_dst, struct dentry *h_src, int ignore_flags,
- unsigned int verbose);
-ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size);
-ssize_t aufs_getxattr(struct dentry *dentry, const char *name, void *value,
- size_t size);
-int aufs_setxattr(struct dentry *dentry, const char *name, const void *value,
- size_t size, int flags);
-int aufs_removexattr(struct dentry *dentry, const char *name);
-
-/* void au_xattr_init(struct super_block *sb); */
-#else
-AuStubInt0(au_cpup_xattr, struct dentry *h_dst, struct dentry *h_src,
- int ignore_flags, unsigned int verbose);
-/* AuStubVoid(au_xattr_init, struct super_block *sb); */
-#endif
-
-#ifdef CONFIG_FS_POSIX_ACL
-struct posix_acl *aufs_get_acl(struct inode *inode, int type);
-int aufs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
-#endif
-
-#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
-enum {
- AU_XATTR_SET,
- AU_XATTR_REMOVE,
- AU_ACL_SET
-};
-
-struct au_srxattr {
- int type;
- union {
- struct {
- const char *name;
- const void *value;
- size_t size;
- int flags;
- } set;
- struct {
- const char *name;
- } remove;
- struct {
- struct posix_acl *acl;
- int type;
- } acl_set;
- } u;
-};
-ssize_t au_srxattr(struct dentry *dentry, struct au_srxattr *arg);
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-/* lock subclass for iinfo */
-enum {
- AuLsc_II_CHILD, /* child first */
- AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hnotify */
- AuLsc_II_CHILD3, /* copyup dirs */
- AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */
- AuLsc_II_PARENT2,
- AuLsc_II_PARENT3, /* copyup dirs */
- AuLsc_II_NEW_CHILD
-};
-
-/*
- * ii_read_lock_child, ii_write_lock_child,
- * ii_read_lock_child2, ii_write_lock_child2,
- * ii_read_lock_child3, ii_write_lock_child3,
- * ii_read_lock_parent, ii_write_lock_parent,
- * ii_read_lock_parent2, ii_write_lock_parent2,
- * ii_read_lock_parent3, ii_write_lock_parent3,
- * ii_read_lock_new_child, ii_write_lock_new_child,
- */
-#define AuReadLockFunc(name, lsc) \
-static inline void ii_read_lock_##name(struct inode *i) \
-{ \
- au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
-}
-
-#define AuWriteLockFunc(name, lsc) \
-static inline void ii_write_lock_##name(struct inode *i) \
-{ \
- au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
-}
-
-#define AuRWLockFuncs(name, lsc) \
- AuReadLockFunc(name, lsc) \
- AuWriteLockFunc(name, lsc)
-
-AuRWLockFuncs(child, CHILD);
-AuRWLockFuncs(child2, CHILD2);
-AuRWLockFuncs(child3, CHILD3);
-AuRWLockFuncs(parent, PARENT);
-AuRWLockFuncs(parent2, PARENT2);
-AuRWLockFuncs(parent3, PARENT3);
-AuRWLockFuncs(new_child, NEW_CHILD);
-
-#undef AuReadLockFunc
-#undef AuWriteLockFunc
-#undef AuRWLockFuncs
-
-/*
- * ii_read_unlock, ii_write_unlock, ii_downgrade_lock
- */
-AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem);
-
-#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
-#define IiMustAnyLock(i) AuRwMustAnyLock(&au_ii(i)->ii_rwsem)
-#define IiMustWriteLock(i) AuRwMustWriteLock(&au_ii(i)->ii_rwsem)
-
-/* ---------------------------------------------------------------------- */
-
-static inline void au_icntnr_init(struct au_icntnr *c)
-{
-#ifdef CONFIG_AUFS_DEBUG
- c->vfs_inode.i_mode = 0;
-#endif
-}
-
-static inline unsigned int au_iigen(struct inode *inode, unsigned int *igflags)
-{
- unsigned int gen;
- struct au_iinfo *iinfo;
- struct au_iigen *iigen;
-
- iinfo = au_ii(inode);
- iigen = &iinfo->ii_generation;
- spin_lock(&iigen->ig_spin);
- if (igflags)
- *igflags = iigen->ig_flags;
- gen = iigen->ig_generation;
- spin_unlock(&iigen->ig_spin);
-
- return gen;
-}
-
-/* tiny test for inode number */
-/* tmpfs generation is too rough */
-static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
-{
- struct au_iinfo *iinfo;
-
- iinfo = au_ii(inode);
- AuRwMustAnyLock(&iinfo->ii_rwsem);
- return !(iinfo->ii_hsb1 == h_inode->i_sb
- && iinfo->ii_higen == h_inode->i_generation);
-}
-
-static inline void au_iigen_dec(struct inode *inode)
-{
- struct au_iinfo *iinfo;
- struct au_iigen *iigen;
-
- iinfo = au_ii(inode);
- iigen = &iinfo->ii_generation;
- spin_lock(&iigen->ig_spin);
- iigen->ig_generation--;
- spin_unlock(&iigen->ig_spin);
-}
-
-static inline int au_iigen_test(struct inode *inode, unsigned int sigen)
-{
- int err;
-
- err = 0;
- if (unlikely(inode && au_iigen(inode, NULL) != sigen))
- err = -EIO;
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
- aufs_bindex_t bindex)
-{
- IiMustAnyLock(inode);
- return au_ii(inode)->ii_hinode[0 + bindex].hi_id;
-}
-
-static inline aufs_bindex_t au_ibstart(struct inode *inode)
-{
- IiMustAnyLock(inode);
- return au_ii(inode)->ii_bstart;
-}
-
-static inline aufs_bindex_t au_ibend(struct inode *inode)
-{
- IiMustAnyLock(inode);
- return au_ii(inode)->ii_bend;
-}
-
-static inline struct au_vdir *au_ivdir(struct inode *inode)
-{
- IiMustAnyLock(inode);
- return au_ii(inode)->ii_vdir;
-}
-
-static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
-{
- IiMustAnyLock(inode);
- return au_ii(inode)->ii_hinode[0 + bindex].hi_whdentry;
-}
-
-static inline void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex)
-{
- IiMustWriteLock(inode);
- au_ii(inode)->ii_bstart = bindex;
-}
-
-static inline void au_set_ibend(struct inode *inode, aufs_bindex_t bindex)
-{
- IiMustWriteLock(inode);
- au_ii(inode)->ii_bend = bindex;
-}
-
-static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
-{
- IiMustWriteLock(inode);
- au_ii(inode)->ii_vdir = vdir;
-}
-
-static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
-{
- IiMustAnyLock(inode);
- return au_ii(inode)->ii_hinode + bindex;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static inline struct dentry *au_pinned_parent(struct au_pin *pin)
-{
- if (pin)
- return pin->parent;
- return NULL;
-}
-
-static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
-{
- if (pin && pin->hdir)
- return pin->hdir->hi_inode;
- return NULL;
-}
-
-static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
-{
- if (pin)
- return pin->hdir;
- return NULL;
-}
-
-static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
-{
- if (pin)
- pin->dentry = dentry;
-}
-
-static inline void au_pin_set_parent_lflag(struct au_pin *pin,
- unsigned char lflag)
-{
- if (pin) {
- if (lflag)
- au_fset_pin(pin->flags, DI_LOCKED);
- else
- au_fclr_pin(pin->flags, DI_LOCKED);
- }
-}
-
-#if 0 /* reserved */
-static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
-{
- if (pin) {
- dput(pin->parent);
- pin->parent = dget(parent);
- }
-}
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-struct au_branch;
-#ifdef CONFIG_AUFS_HNOTIFY
-struct au_hnotify_op {
- void (*ctl)(struct au_hinode *hinode, int do_set);
- int (*alloc)(struct au_hinode *hinode);
-
- /*
- * if it returns true, the the caller should free hinode->hi_notify,
- * otherwise ->free() frees it.
- */
- int (*free)(struct au_hinode *hinode,
- struct au_hnotify *hn) __must_check;
-
- void (*fin)(void);
- int (*init)(void);
-
- int (*reset_br)(unsigned int udba, struct au_branch *br, int perm);
- void (*fin_br)(struct au_branch *br);
- int (*init_br)(struct au_branch *br, int perm);
-};
-
-/* hnotify.c */
-int au_hn_alloc(struct au_hinode *hinode, struct inode *inode);
-void au_hn_free(struct au_hinode *hinode);
-void au_hn_ctl(struct au_hinode *hinode, int do_set);
-void au_hn_reset(struct inode *inode, unsigned int flags);
-int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
- struct qstr *h_child_qstr, struct inode *h_child_inode);
-int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm);
-int au_hnotify_init_br(struct au_branch *br, int perm);
-void au_hnotify_fin_br(struct au_branch *br);
-int __init au_hnotify_init(void);
-void au_hnotify_fin(void);
-
-/* hfsnotify.c */
-extern const struct au_hnotify_op au_hnotify_op;
-
-static inline
-void au_hn_init(struct au_hinode *hinode)
-{
- hinode->hi_notify = NULL;
-}
-
-static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
-{
- return hinode->hi_notify;
-}
-
-#else
-AuStub(int, au_hn_alloc, return -EOPNOTSUPP,
- struct au_hinode *hinode __maybe_unused,
- struct inode *inode __maybe_unused)
-AuStub(struct au_hnotify *, au_hn, return NULL, struct au_hinode *hinode)
-AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused)
-AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused,
- int do_set __maybe_unused)
-AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused,
- unsigned int flags __maybe_unused)
-AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused,
- struct au_branch *br __maybe_unused,
- int perm __maybe_unused)
-AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused,
- int perm __maybe_unused)
-AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused)
-AuStubInt0(__init au_hnotify_init, void)
-AuStubVoid(au_hnotify_fin, void)
-AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused)
-#endif /* CONFIG_AUFS_HNOTIFY */
-
-static inline void au_hn_suspend(struct au_hinode *hdir)
-{
- au_hn_ctl(hdir, /*do_set*/0);
-}
-
-static inline void au_hn_resume(struct au_hinode *hdir)
-{
- au_hn_ctl(hdir, /*do_set*/1);
-}
-
-static inline void au_hn_imtx_lock(struct au_hinode *hdir)
-{
- mutex_lock(&hdir->hi_inode->i_mutex);
- au_hn_suspend(hdir);
-}
-
-static inline void au_hn_imtx_lock_nested(struct au_hinode *hdir,
- unsigned int sc __maybe_unused)
-{
- mutex_lock_nested(&hdir->hi_inode->i_mutex, sc);
- au_hn_suspend(hdir);
-}
-
-static inline void au_hn_imtx_unlock(struct au_hinode *hdir)
-{
- au_hn_resume(hdir);
- mutex_unlock(&hdir->hi_inode->i_mutex);
-}
-
-#endif /* __KERNEL__ */
-#endif /* __AUFS_INODE_H__ */
diff --git a/fs/aufs/ioctl.c b/fs/aufs/ioctl.c
deleted file mode 100644
index 6528fb911..000000000
--- a/fs/aufs/ioctl.c
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * ioctl
- * plink-management and readdir in userspace.
- * assist the pathconf(3) wrapper library.
- * move-down
- * File-based Hierarchical Storage Management.
- */
-
-#include <linux/compat.h>
-#include <linux/file.h>
-#include "aufs.h"
-
-static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg)
-{
- int err, fd;
- aufs_bindex_t wbi, bindex, bend;
- struct file *h_file;
- struct super_block *sb;
- struct dentry *root;
- struct au_branch *br;
- struct aufs_wbr_fd wbrfd = {
- .oflags = au_dir_roflags,
- .brid = -1
- };
- const int valid = O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_DIRECTORY
- | O_NOATIME | O_CLOEXEC;
-
- AuDebugOn(wbrfd.oflags & ~valid);
-
- if (arg) {
- err = copy_from_user(&wbrfd, arg, sizeof(wbrfd));
- if (unlikely(err)) {
- err = -EFAULT;
- goto out;
- }
-
- err = -EINVAL;
- AuDbg("wbrfd{0%o, %d}\n", wbrfd.oflags, wbrfd.brid);
- wbrfd.oflags |= au_dir_roflags;
- AuDbg("0%o\n", wbrfd.oflags);
- if (unlikely(wbrfd.oflags & ~valid))
- goto out;
- }
-
- fd = get_unused_fd_flags(0);
- err = fd;
- if (unlikely(fd < 0))
- goto out;
-
- h_file = ERR_PTR(-EINVAL);
- wbi = 0;
- br = NULL;
- sb = path->dentry->d_sb;
- root = sb->s_root;
- aufs_read_lock(root, AuLock_IR);
- bend = au_sbend(sb);
- if (wbrfd.brid >= 0) {
- wbi = au_br_index(sb, wbrfd.brid);
- if (unlikely(wbi < 0 || wbi > bend))
- goto out_unlock;
- }
-
- h_file = ERR_PTR(-ENOENT);
- br = au_sbr(sb, wbi);
- if (!au_br_writable(br->br_perm)) {
- if (arg)
- goto out_unlock;
-
- bindex = wbi + 1;
- wbi = -1;
- for (; bindex <= bend; bindex++) {
- br = au_sbr(sb, bindex);
- if (au_br_writable(br->br_perm)) {
- wbi = bindex;
- br = au_sbr(sb, wbi);
- break;
- }
- }
- }
- AuDbg("wbi %d\n", wbi);
- if (wbi >= 0)
- h_file = au_h_open(root, wbi, wbrfd.oflags, NULL,
- /*force_wr*/0);
-
-out_unlock:
- aufs_read_unlock(root, AuLock_IR);
- err = PTR_ERR(h_file);
- if (IS_ERR(h_file))
- goto out_fd;
-
- atomic_dec(&br->br_count); /* cf. au_h_open() */
- fd_install(fd, h_file);
- err = fd;
- goto out; /* success */
-
-out_fd:
- put_unused_fd(fd);
-out:
- AuTraceErr(err);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
-{
- long err;
- struct dentry *dentry;
-
- switch (cmd) {
- case AUFS_CTL_RDU:
- case AUFS_CTL_RDU_INO:
- err = au_rdu_ioctl(file, cmd, arg);
- break;
-
- case AUFS_CTL_WBR_FD:
- err = au_wbr_fd(&file->f_path, (void __user *)arg);
- break;
-
- case AUFS_CTL_IBUSY:
- err = au_ibusy_ioctl(file, arg);
- break;
-
- case AUFS_CTL_BRINFO:
- err = au_brinfo_ioctl(file, arg);
- break;
-
- case AUFS_CTL_FHSM_FD:
- dentry = file->f_path.dentry;
- if (IS_ROOT(dentry))
- err = au_fhsm_fd(dentry->d_sb, arg);
- else
- err = -ENOTTY;
- break;
-
- default:
- /* do not call the lower */
- AuDbg("0x%x\n", cmd);
- err = -ENOTTY;
- }
-
- AuTraceErr(err);
- return err;
-}
-
-long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg)
-{
- long err;
-
- switch (cmd) {
- case AUFS_CTL_MVDOWN:
- err = au_mvdown(file->f_path.dentry, (void __user *)arg);
- break;
-
- case AUFS_CTL_WBR_FD:
- err = au_wbr_fd(&file->f_path, (void __user *)arg);
- break;
-
- default:
- /* do not call the lower */
- AuDbg("0x%x\n", cmd);
- err = -ENOTTY;
- }
-
- AuTraceErr(err);
- return err;
-}
-
-#ifdef CONFIG_COMPAT
-long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
- unsigned long arg)
-{
- long err;
-
- switch (cmd) {
- case AUFS_CTL_RDU:
- case AUFS_CTL_RDU_INO:
- err = au_rdu_compat_ioctl(file, cmd, arg);
- break;
-
- case AUFS_CTL_IBUSY:
- err = au_ibusy_compat_ioctl(file, arg);
- break;
-
- case AUFS_CTL_BRINFO:
- err = au_brinfo_compat_ioctl(file, arg);
- break;
-
- default:
- err = aufs_ioctl_dir(file, cmd, arg);
- }
-
- AuTraceErr(err);
- return err;
-}
-
-long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
- unsigned long arg)
-{
- return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg));
-}
-#endif
diff --git a/fs/aufs/loop.c b/fs/aufs/loop.c
deleted file mode 100644
index 5711e7a2f..000000000
--- a/fs/aufs/loop.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * support for loopback block device as a branch
- */
-
-#include "aufs.h"
-
-/* added into drivers/block/loop.c */
-static struct file *(*backing_file_func)(struct super_block *sb);
-
-/*
- * test if two lower dentries have overlapping branches.
- */
-int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding)
-{
- struct super_block *h_sb;
- struct file *backing_file;
-
- if (unlikely(!backing_file_func)) {
- /* don't load "loop" module here */
- backing_file_func = symbol_get(loop_backing_file);
- if (unlikely(!backing_file_func))
- /* "loop" module is not loaded */
- return 0;
- }
-
- h_sb = h_adding->d_sb;
- backing_file = backing_file_func(h_sb);
- if (!backing_file)
- return 0;
-
- h_adding = backing_file->f_path.dentry;
- /*
- * h_adding can be local NFS.
- * in this case aufs cannot detect the loop.
- */
- if (unlikely(h_adding->d_sb == sb))
- return 1;
- return !!au_test_subdir(h_adding, sb->s_root);
-}
-
-/* true if a kernel thread named 'loop[0-9].*' accesses a file */
-int au_test_loopback_kthread(void)
-{
- int ret;
- struct task_struct *tsk = current;
- char c, comm[sizeof(tsk->comm)];
-
- ret = 0;
- if (tsk->flags & PF_KTHREAD) {
- get_task_comm(comm, tsk);
- c = comm[4];
- ret = ('0' <= c && c <= '9'
- && !strncmp(comm, "loop", 4));
- }
-
- return ret;
-}
-
-/* ---------------------------------------------------------------------- */
-
-#define au_warn_loopback_step 16
-static int au_warn_loopback_nelem = au_warn_loopback_step;
-static unsigned long *au_warn_loopback_array;
-
-void au_warn_loopback(struct super_block *h_sb)
-{
- int i, new_nelem;
- unsigned long *a, magic;
- static DEFINE_SPINLOCK(spin);
-
- magic = h_sb->s_magic;
- spin_lock(&spin);
- a = au_warn_loopback_array;
- for (i = 0; i < au_warn_loopback_nelem && *a; i++)
- if (a[i] == magic) {
- spin_unlock(&spin);
- return;
- }
-
- /* h_sb is new to us, print it */
- if (i < au_warn_loopback_nelem) {
- a[i] = magic;
- goto pr;
- }
-
- /* expand the array */
- new_nelem = au_warn_loopback_nelem + au_warn_loopback_step;
- a = au_kzrealloc(au_warn_loopback_array,
- au_warn_loopback_nelem * sizeof(unsigned long),
- new_nelem * sizeof(unsigned long), GFP_ATOMIC);
- if (a) {
- au_warn_loopback_nelem = new_nelem;
- au_warn_loopback_array = a;
- a[i] = magic;
- goto pr;
- }
-
- spin_unlock(&spin);
- AuWarn1("realloc failed, ignored\n");
- return;
-
-pr:
- spin_unlock(&spin);
- pr_warn("you may want to try another patch for loopback file "
- "on %s(0x%lx) branch\n", au_sbtype(h_sb), magic);
-}
-
-int au_loopback_init(void)
-{
- int err;
- struct super_block *sb __maybe_unused;
-
- BUILD_BUG_ON(sizeof(sb->s_magic) != sizeof(unsigned long));
-
- err = 0;
- au_warn_loopback_array = kcalloc(au_warn_loopback_step,
- sizeof(unsigned long), GFP_NOFS);
- if (unlikely(!au_warn_loopback_array))
- err = -ENOMEM;
-
- return err;
-}
-
-void au_loopback_fin(void)
-{
- if (backing_file_func)
- symbol_put(loop_backing_file);
- kfree(au_warn_loopback_array);
-}
diff --git a/fs/aufs/loop.h b/fs/aufs/loop.h
deleted file mode 100644
index 48bf070e8..000000000
--- a/fs/aufs/loop.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * support for loopback mount as a branch
- */
-
-#ifndef __AUFS_LOOP_H__
-#define __AUFS_LOOP_H__
-
-#ifdef __KERNEL__
-
-struct dentry;
-struct super_block;
-
-#ifdef CONFIG_AUFS_BDEV_LOOP
-/* drivers/block/loop.c */
-struct file *loop_backing_file(struct super_block *sb);
-
-/* loop.c */
-int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding);
-int au_test_loopback_kthread(void);
-void au_warn_loopback(struct super_block *h_sb);
-
-int au_loopback_init(void);
-void au_loopback_fin(void);
-#else
-AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
- struct dentry *h_adding)
-AuStubInt0(au_test_loopback_kthread, void)
-AuStubVoid(au_warn_loopback, struct super_block *h_sb)
-
-AuStubInt0(au_loopback_init, void)
-AuStubVoid(au_loopback_fin, void)
-#endif /* BLK_DEV_LOOP */
-
-#endif /* __KERNEL__ */
-#endif /* __AUFS_LOOP_H__ */
diff --git a/fs/aufs/magic.mk b/fs/aufs/magic.mk
deleted file mode 100644
index 4f83bdf1d..000000000
--- a/fs/aufs/magic.mk
+++ /dev/null
@@ -1,30 +0,0 @@
-
-# defined in ${srctree}/fs/fuse/inode.c
-# tristate
-ifdef CONFIG_FUSE_FS
-ccflags-y += -DFUSE_SUPER_MAGIC=0x65735546
-endif
-
-# defined in ${srctree}/fs/xfs/xfs_sb.h
-# tristate
-ifdef CONFIG_XFS_FS
-ccflags-y += -DXFS_SB_MAGIC=0x58465342
-endif
-
-# defined in ${srctree}/fs/configfs/mount.c
-# tristate
-ifdef CONFIG_CONFIGFS_FS
-ccflags-y += -DCONFIGFS_MAGIC=0x62656570
-endif
-
-# defined in ${srctree}/fs/ubifs/ubifs.h
-# tristate
-ifdef CONFIG_UBIFS_FS
-ccflags-y += -DUBIFS_SUPER_MAGIC=0x24051905
-endif
-
-# defined in ${srctree}/fs/hfsplus/hfsplus_raw.h
-# tristate
-ifdef CONFIG_HFSPLUS_FS
-ccflags-y += -DHFSPLUS_SUPER_MAGIC=0x482b
-endif
diff --git a/fs/aufs/module.c b/fs/aufs/module.c
deleted file mode 100644
index 8a28377c5..000000000
--- a/fs/aufs/module.c
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * module global variables and operations
- */
-
-#include <linux/module.h>
-#include <linux/seq_file.h>
-#include "aufs.h"
-
-void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp)
-{
- if (new_sz <= nused)
- return p;
-
- p = krealloc(p, new_sz, gfp);
- if (p)
- memset(p + nused, 0, new_sz - nused);
- return p;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * aufs caches
- */
-struct kmem_cache *au_cachep[AuCache_Last];
-static int __init au_cache_init(void)
-{
- au_cachep[AuCache_DINFO] = AuCacheCtor(au_dinfo, au_di_init_once);
- if (au_cachep[AuCache_DINFO])
- /* SLAB_DESTROY_BY_RCU */
- au_cachep[AuCache_ICNTNR] = AuCacheCtor(au_icntnr,
- au_icntnr_init_once);
- if (au_cachep[AuCache_ICNTNR])
- au_cachep[AuCache_FINFO] = AuCacheCtor(au_finfo,
- au_fi_init_once);
- if (au_cachep[AuCache_FINFO])
- au_cachep[AuCache_VDIR] = AuCache(au_vdir);
- if (au_cachep[AuCache_VDIR])
- au_cachep[AuCache_DEHSTR] = AuCache(au_vdir_dehstr);
- if (au_cachep[AuCache_DEHSTR])
- return 0;
-
- return -ENOMEM;
-}
-
-static void au_cache_fin(void)
-{
- int i;
-
- /*
- * Make sure all delayed rcu free inodes are flushed before we
- * destroy cache.
- */
- rcu_barrier();
-
- /* excluding AuCache_HNOTIFY */
- BUILD_BUG_ON(AuCache_HNOTIFY + 1 != AuCache_Last);
- for (i = 0; i < AuCache_HNOTIFY; i++) {
- kmem_cache_destroy(au_cachep[i]);
- au_cachep[i] = NULL;
- }
-}
-
-/* ---------------------------------------------------------------------- */
-
-int au_dir_roflags;
-
-#ifdef CONFIG_AUFS_SBILIST
-/*
- * iterate_supers_type() doesn't protect us from
- * remounting (branch management)
- */
-struct au_splhead au_sbilist;
-#endif
-
-struct lock_class_key au_lc_key[AuLcKey_Last];
-
-/*
- * functions for module interface.
- */
-MODULE_LICENSE("GPL");
-/* MODULE_LICENSE("GPL v2"); */
-MODULE_AUTHOR("Junjiro R. Okajima <aufs-users@lists.sourceforge.net>");
-MODULE_DESCRIPTION(AUFS_NAME
- " -- Advanced multi layered unification filesystem");
-MODULE_VERSION(AUFS_VERSION);
-
-/* this module parameter has no meaning when SYSFS is disabled */
-int sysaufs_brs = 1;
-MODULE_PARM_DESC(brs, "use <sysfs>/fs/aufs/si_*/brN");
-module_param_named(brs, sysaufs_brs, int, S_IRUGO);
-
-/* this module parameter has no meaning when USER_NS is disabled */
-bool au_userns;
-MODULE_PARM_DESC(allow_userns, "allow unprivileged to mount under userns");
-module_param_named(allow_userns, au_userns, bool, S_IRUGO);
-
-/* ---------------------------------------------------------------------- */
-
-static char au_esc_chars[0x20 + 3]; /* 0x01-0x20, backslash, del, and NULL */
-
-int au_seq_path(struct seq_file *seq, struct path *path)
-{
- int err;
-
- err = seq_path(seq, path, au_esc_chars);
- if (err > 0)
- err = 0;
- else if (err < 0)
- err = -ENOMEM;
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static int __init aufs_init(void)
-{
- int err, i;
- char *p;
-
- p = au_esc_chars;
- for (i = 1; i <= ' '; i++)
- *p++ = i;
- *p++ = '\\';
- *p++ = '\x7f';
- *p = 0;
-
- au_dir_roflags = au_file_roflags(O_DIRECTORY | O_LARGEFILE);
-
- memcpy(aufs_iop_nogetattr, aufs_iop, sizeof(aufs_iop));
- for (i = 0; i < AuIop_Last; i++)
- aufs_iop_nogetattr[i].getattr = NULL;
-
- au_sbilist_init();
- sysaufs_brs_init();
- au_debug_init();
- au_dy_init();
- err = sysaufs_init();
- if (unlikely(err))
- goto out;
- err = au_procfs_init();
- if (unlikely(err))
- goto out_sysaufs;
- err = au_wkq_init();
- if (unlikely(err))
- goto out_procfs;
- err = au_loopback_init();
- if (unlikely(err))
- goto out_wkq;
- err = au_hnotify_init();
- if (unlikely(err))
- goto out_loopback;
- err = au_sysrq_init();
- if (unlikely(err))
- goto out_hin;
- err = au_cache_init();
- if (unlikely(err))
- goto out_sysrq;
-
- aufs_fs_type.fs_flags |= au_userns ? FS_USERNS_MOUNT : 0;
- err = register_filesystem(&aufs_fs_type);
- if (unlikely(err))
- goto out_cache;
-
- /* since we define pr_fmt, call printk directly */
- printk(KERN_INFO AUFS_NAME " " AUFS_VERSION "\n");
- goto out; /* success */
-
-out_cache:
- au_cache_fin();
-out_sysrq:
- au_sysrq_fin();
-out_hin:
- au_hnotify_fin();
-out_loopback:
- au_loopback_fin();
-out_wkq:
- au_wkq_fin();
-out_procfs:
- au_procfs_fin();
-out_sysaufs:
- sysaufs_fin();
- au_dy_fin();
-out:
- return err;
-}
-
-static void __exit aufs_exit(void)
-{
- unregister_filesystem(&aufs_fs_type);
- au_cache_fin();
- au_sysrq_fin();
- au_hnotify_fin();
- au_loopback_fin();
- au_wkq_fin();
- au_procfs_fin();
- sysaufs_fin();
- au_dy_fin();
-}
-
-module_init(aufs_init);
-module_exit(aufs_exit);
diff --git a/fs/aufs/module.h b/fs/aufs/module.h
deleted file mode 100644
index bb8644730..000000000
--- a/fs/aufs/module.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * module initialization and module-global
- */
-
-#ifndef __AUFS_MODULE_H__
-#define __AUFS_MODULE_H__
-
-#ifdef __KERNEL__
-
-#include <linux/slab.h>
-
-struct path;
-struct seq_file;
-
-/* module parameters */
-extern int sysaufs_brs;
-extern bool au_userns;
-
-/* ---------------------------------------------------------------------- */
-
-extern int au_dir_roflags;
-
-enum {
- AuLcNonDir_FIINFO,
- AuLcNonDir_DIINFO,
- AuLcNonDir_IIINFO,
-
- AuLcDir_FIINFO,
- AuLcDir_DIINFO,
- AuLcDir_IIINFO,
-
- AuLcSymlink_DIINFO,
- AuLcSymlink_IIINFO,
-
- AuLcKey_Last
-};
-extern struct lock_class_key au_lc_key[AuLcKey_Last];
-
-void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp);
-int au_seq_path(struct seq_file *seq, struct path *path);
-
-#ifdef CONFIG_PROC_FS
-/* procfs.c */
-int __init au_procfs_init(void);
-void au_procfs_fin(void);
-#else
-AuStubInt0(au_procfs_init, void);
-AuStubVoid(au_procfs_fin, void);
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-/* kmem cache */
-enum {
- AuCache_DINFO,
- AuCache_ICNTNR,
- AuCache_FINFO,
- AuCache_VDIR,
- AuCache_DEHSTR,
- AuCache_HNOTIFY, /* must be last */
- AuCache_Last
-};
-
-#define AuCacheFlags (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD)
-#define AuCache(type) KMEM_CACHE(type, AuCacheFlags)
-#define AuCacheCtor(type, ctor) \
- kmem_cache_create(#type, sizeof(struct type), \
- __alignof__(struct type), AuCacheFlags, ctor)
-
-extern struct kmem_cache *au_cachep[];
-
-#define AuCacheFuncs(name, index) \
-static inline struct au_##name *au_cache_alloc_##name(void) \
-{ return kmem_cache_alloc(au_cachep[AuCache_##index], GFP_NOFS); } \
-static inline void au_cache_free_##name(struct au_##name *p) \
-{ kmem_cache_free(au_cachep[AuCache_##index], p); }
-
-AuCacheFuncs(dinfo, DINFO);
-AuCacheFuncs(icntnr, ICNTNR);
-AuCacheFuncs(finfo, FINFO);
-AuCacheFuncs(vdir, VDIR);
-AuCacheFuncs(vdir_dehstr, DEHSTR);
-#ifdef CONFIG_AUFS_HNOTIFY
-AuCacheFuncs(hnotify, HNOTIFY);
-#endif
-
-#endif /* __KERNEL__ */
-#endif /* __AUFS_MODULE_H__ */
diff --git a/fs/aufs/mvdown.c b/fs/aufs/mvdown.c
deleted file mode 100644
index 1f2224f6d..000000000
--- a/fs/aufs/mvdown.c
+++ /dev/null
@@ -1,690 +0,0 @@
-/*
- * Copyright (C) 2011-2016 Junjiro R. Okajima
- */
-
-/*
- * move-down, opposite of copy-up
- */
-
-#include "aufs.h"
-
-struct au_mvd_args {
- struct {
- struct super_block *h_sb;
- struct dentry *h_parent;
- struct au_hinode *hdir;
- struct inode *h_dir, *h_inode;
- struct au_pin pin;
- } info[AUFS_MVDOWN_NARRAY];
-
- struct aufs_mvdown mvdown;
- struct dentry *dentry, *parent;
- struct inode *inode, *dir;
- struct super_block *sb;
- aufs_bindex_t bopq, bwh, bfound;
- unsigned char rename_lock;
-};
-
-#define mvd_errno mvdown.au_errno
-#define mvd_bsrc mvdown.stbr[AUFS_MVDOWN_UPPER].bindex
-#define mvd_src_brid mvdown.stbr[AUFS_MVDOWN_UPPER].brid
-#define mvd_bdst mvdown.stbr[AUFS_MVDOWN_LOWER].bindex
-#define mvd_dst_brid mvdown.stbr[AUFS_MVDOWN_LOWER].brid
-
-#define mvd_h_src_sb info[AUFS_MVDOWN_UPPER].h_sb
-#define mvd_h_src_parent info[AUFS_MVDOWN_UPPER].h_parent
-#define mvd_hdir_src info[AUFS_MVDOWN_UPPER].hdir
-#define mvd_h_src_dir info[AUFS_MVDOWN_UPPER].h_dir
-#define mvd_h_src_inode info[AUFS_MVDOWN_UPPER].h_inode
-#define mvd_pin_src info[AUFS_MVDOWN_UPPER].pin
-
-#define mvd_h_dst_sb info[AUFS_MVDOWN_LOWER].h_sb
-#define mvd_h_dst_parent info[AUFS_MVDOWN_LOWER].h_parent
-#define mvd_hdir_dst info[AUFS_MVDOWN_LOWER].hdir
-#define mvd_h_dst_dir info[AUFS_MVDOWN_LOWER].h_dir
-#define mvd_h_dst_inode info[AUFS_MVDOWN_LOWER].h_inode
-#define mvd_pin_dst info[AUFS_MVDOWN_LOWER].pin
-
-#define AU_MVD_PR(flag, ...) do { \
- if (flag) \
- pr_err(__VA_ARGS__); \
- } while (0)
-
-static int find_lower_writable(struct au_mvd_args *a)
-{
- struct super_block *sb;
- aufs_bindex_t bindex, bend;
- struct au_branch *br;
-
- sb = a->sb;
- bindex = a->mvd_bsrc;
- bend = au_sbend(sb);
- if (a->mvdown.flags & AUFS_MVDOWN_FHSM_LOWER)
- for (bindex++; bindex <= bend; bindex++) {
- br = au_sbr(sb, bindex);
- if (au_br_fhsm(br->br_perm)
- && (!(au_br_sb(br)->s_flags & MS_RDONLY)))
- return bindex;
- }
- else if (!(a->mvdown.flags & AUFS_MVDOWN_ROLOWER))
- for (bindex++; bindex <= bend; bindex++) {
- br = au_sbr(sb, bindex);
- if (!au_br_rdonly(br))
- return bindex;
- }
- else
- for (bindex++; bindex <= bend; bindex++) {
- br = au_sbr(sb, bindex);
- if (!(au_br_sb(br)->s_flags & MS_RDONLY)) {
- if (au_br_rdonly(br))
- a->mvdown.flags
- |= AUFS_MVDOWN_ROLOWER_R;
- return bindex;
- }
- }
-
- return -1;
-}
-
-/* make the parent dir on bdst */
-static int au_do_mkdir(const unsigned char dmsg, struct au_mvd_args *a)
-{
- int err;
-
- err = 0;
- a->mvd_hdir_src = au_hi(a->dir, a->mvd_bsrc);
- a->mvd_hdir_dst = au_hi(a->dir, a->mvd_bdst);
- a->mvd_h_src_parent = au_h_dptr(a->parent, a->mvd_bsrc);
- a->mvd_h_dst_parent = NULL;
- if (au_dbend(a->parent) >= a->mvd_bdst)
- a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
- if (!a->mvd_h_dst_parent) {
- err = au_cpdown_dirs(a->dentry, a->mvd_bdst);
- if (unlikely(err)) {
- AU_MVD_PR(dmsg, "cpdown_dirs failed\n");
- goto out;
- }
- a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
- }
-
-out:
- AuTraceErr(err);
- return err;
-}
-
-/* lock them all */
-static int au_do_lock(const unsigned char dmsg, struct au_mvd_args *a)
-{
- int err;
- struct dentry *h_trap;
-
- a->mvd_h_src_sb = au_sbr_sb(a->sb, a->mvd_bsrc);
- a->mvd_h_dst_sb = au_sbr_sb(a->sb, a->mvd_bdst);
- err = au_pin(&a->mvd_pin_dst, a->dentry, a->mvd_bdst,
- au_opt_udba(a->sb),
- AuPin_MNT_WRITE | AuPin_DI_LOCKED);
- AuTraceErr(err);
- if (unlikely(err)) {
- AU_MVD_PR(dmsg, "pin_dst failed\n");
- goto out;
- }
-
- if (a->mvd_h_src_sb != a->mvd_h_dst_sb) {
- a->rename_lock = 0;
- au_pin_init(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
- AuLsc_DI_PARENT, AuLsc_I_PARENT3,
- au_opt_udba(a->sb),
- AuPin_MNT_WRITE | AuPin_DI_LOCKED);
- err = au_do_pin(&a->mvd_pin_src);
- AuTraceErr(err);
- a->mvd_h_src_dir = d_inode(a->mvd_h_src_parent);
- if (unlikely(err)) {
- AU_MVD_PR(dmsg, "pin_src failed\n");
- goto out_dst;
- }
- goto out; /* success */
- }
-
- a->rename_lock = 1;
- au_pin_hdir_unlock(&a->mvd_pin_dst);
- err = au_pin(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
- au_opt_udba(a->sb),
- AuPin_MNT_WRITE | AuPin_DI_LOCKED);
- AuTraceErr(err);
- a->mvd_h_src_dir = d_inode(a->mvd_h_src_parent);
- if (unlikely(err)) {
- AU_MVD_PR(dmsg, "pin_src failed\n");
- au_pin_hdir_lock(&a->mvd_pin_dst);
- goto out_dst;
- }
- au_pin_hdir_unlock(&a->mvd_pin_src);
- h_trap = vfsub_lock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
- a->mvd_h_dst_parent, a->mvd_hdir_dst);
- if (h_trap) {
- err = (h_trap != a->mvd_h_src_parent);
- if (err)
- err = (h_trap != a->mvd_h_dst_parent);
- }
- BUG_ON(err); /* it should never happen */
- if (unlikely(a->mvd_h_src_dir != au_pinned_h_dir(&a->mvd_pin_src))) {
- err = -EBUSY;
- AuTraceErr(err);
- vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
- a->mvd_h_dst_parent, a->mvd_hdir_dst);
- au_pin_hdir_lock(&a->mvd_pin_src);
- au_unpin(&a->mvd_pin_src);
- au_pin_hdir_lock(&a->mvd_pin_dst);
- goto out_dst;
- }
- goto out; /* success */
-
-out_dst:
- au_unpin(&a->mvd_pin_dst);
-out:
- AuTraceErr(err);
- return err;
-}
-
-static void au_do_unlock(const unsigned char dmsg, struct au_mvd_args *a)
-{
- if (!a->rename_lock)
- au_unpin(&a->mvd_pin_src);
- else {
- vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
- a->mvd_h_dst_parent, a->mvd_hdir_dst);
- au_pin_hdir_lock(&a->mvd_pin_src);
- au_unpin(&a->mvd_pin_src);
- au_pin_hdir_lock(&a->mvd_pin_dst);
- }
- au_unpin(&a->mvd_pin_dst);
-}
-
-/* copy-down the file */
-static int au_do_cpdown(const unsigned char dmsg, struct au_mvd_args *a)
-{
- int err;
- struct au_cp_generic cpg = {
- .dentry = a->dentry,
- .bdst = a->mvd_bdst,
- .bsrc = a->mvd_bsrc,
- .len = -1,
- .pin = &a->mvd_pin_dst,
- .flags = AuCpup_DTIME | AuCpup_HOPEN
- };
-
- AuDbg("b%d, b%d\n", cpg.bsrc, cpg.bdst);
- if (a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
- au_fset_cpup(cpg.flags, OVERWRITE);
- if (a->mvdown.flags & AUFS_MVDOWN_ROLOWER)
- au_fset_cpup(cpg.flags, RWDST);
- err = au_sio_cpdown_simple(&cpg);
- if (unlikely(err))
- AU_MVD_PR(dmsg, "cpdown failed\n");
-
- AuTraceErr(err);
- return err;
-}
-
-/*
- * unlink the whiteout on bdst if exist which may be created by UDBA while we
- * were sleeping
- */
-static int au_do_unlink_wh(const unsigned char dmsg, struct au_mvd_args *a)
-{
- int err;
- struct path h_path;
- struct au_branch *br;
- struct inode *delegated;
-
- br = au_sbr(a->sb, a->mvd_bdst);
- h_path.dentry = au_wh_lkup(a->mvd_h_dst_parent, &a->dentry->d_name, br);
- err = PTR_ERR(h_path.dentry);
- if (IS_ERR(h_path.dentry)) {
- AU_MVD_PR(dmsg, "wh_lkup failed\n");
- goto out;
- }
-
- err = 0;
- if (d_is_positive(h_path.dentry)) {
- h_path.mnt = au_br_mnt(br);
- delegated = NULL;
- err = vfsub_unlink(d_inode(a->mvd_h_dst_parent), &h_path,
- &delegated, /*force*/0);
- if (unlikely(err == -EWOULDBLOCK)) {
- pr_warn("cannot retry for NFSv4 delegation"
- " for an internal unlink\n");
- iput(delegated);
- }
- if (unlikely(err))
- AU_MVD_PR(dmsg, "wh_unlink failed\n");
- }
- dput(h_path.dentry);
-
-out:
- AuTraceErr(err);
- return err;
-}
-
-/*
- * unlink the topmost h_dentry
- */
-static int au_do_unlink(const unsigned char dmsg, struct au_mvd_args *a)
-{
- int err;
- struct path h_path;
- struct inode *delegated;
-
- h_path.mnt = au_sbr_mnt(a->sb, a->mvd_bsrc);
- h_path.dentry = au_h_dptr(a->dentry, a->mvd_bsrc);
- delegated = NULL;
- err = vfsub_unlink(a->mvd_h_src_dir, &h_path, &delegated, /*force*/0);
- if (unlikely(err == -EWOULDBLOCK)) {
- pr_warn("cannot retry for NFSv4 delegation"
- " for an internal unlink\n");
- iput(delegated);
- }
- if (unlikely(err))
- AU_MVD_PR(dmsg, "unlink failed\n");
-
- AuTraceErr(err);
- return err;
-}
-
-/* Since mvdown succeeded, we ignore an error of this function */
-static void au_do_stfs(const unsigned char dmsg, struct au_mvd_args *a)
-{
- int err;
- struct au_branch *br;
-
- a->mvdown.flags |= AUFS_MVDOWN_STFS_FAILED;
- br = au_sbr(a->sb, a->mvd_bsrc);
- err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_UPPER].stfs);
- if (!err) {
- br = au_sbr(a->sb, a->mvd_bdst);
- a->mvdown.stbr[AUFS_MVDOWN_LOWER].brid = br->br_id;
- err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_LOWER].stfs);
- }
- if (!err)
- a->mvdown.flags &= ~AUFS_MVDOWN_STFS_FAILED;
- else
- AU_MVD_PR(dmsg, "statfs failed (%d), ignored\n", err);
-}
-
-/*
- * copy-down the file and unlink the bsrc file.
- * - unlink the bdst whout if exist
- * - copy-down the file (with whtmp name and rename)
- * - unlink the bsrc file
- */
-static int au_do_mvdown(const unsigned char dmsg, struct au_mvd_args *a)
-{
- int err;
-
- err = au_do_mkdir(dmsg, a);
- if (!err)
- err = au_do_lock(dmsg, a);
- if (unlikely(err))
- goto out;
-
- /*
- * do not revert the activities we made on bdst since they should be
- * harmless in aufs.
- */
-
- err = au_do_cpdown(dmsg, a);
- if (!err)
- err = au_do_unlink_wh(dmsg, a);
- if (!err && !(a->mvdown.flags & AUFS_MVDOWN_KUPPER))
- err = au_do_unlink(dmsg, a);
- if (unlikely(err))
- goto out_unlock;
-
- AuDbg("%pd2, 0x%x, %d --> %d\n",
- a->dentry, a->mvdown.flags, a->mvd_bsrc, a->mvd_bdst);
- if (find_lower_writable(a) < 0)
- a->mvdown.flags |= AUFS_MVDOWN_BOTTOM;
-
- if (a->mvdown.flags & AUFS_MVDOWN_STFS)
- au_do_stfs(dmsg, a);
-
- /* maintain internal array */
- if (!(a->mvdown.flags & AUFS_MVDOWN_KUPPER)) {
- au_set_h_dptr(a->dentry, a->mvd_bsrc, NULL);
- au_set_dbstart(a->dentry, a->mvd_bdst);
- au_set_h_iptr(a->inode, a->mvd_bsrc, NULL, /*flags*/0);
- au_set_ibstart(a->inode, a->mvd_bdst);
- } else {
- /* hide the lower */
- au_set_h_dptr(a->dentry, a->mvd_bdst, NULL);
- au_set_dbend(a->dentry, a->mvd_bsrc);
- au_set_h_iptr(a->inode, a->mvd_bdst, NULL, /*flags*/0);
- au_set_ibend(a->inode, a->mvd_bsrc);
- }
- if (au_dbend(a->dentry) < a->mvd_bdst)
- au_set_dbend(a->dentry, a->mvd_bdst);
- if (au_ibend(a->inode) < a->mvd_bdst)
- au_set_ibend(a->inode, a->mvd_bdst);
-
-out_unlock:
- au_do_unlock(dmsg, a);
-out:
- AuTraceErr(err);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* make sure the file is idle */
-static int au_mvd_args_busy(const unsigned char dmsg, struct au_mvd_args *a)
-{
- int err, plinked;
-
- err = 0;
- plinked = !!au_opt_test(au_mntflags(a->sb), PLINK);
- if (au_dbstart(a->dentry) == a->mvd_bsrc
- && au_dcount(a->dentry) == 1
- && atomic_read(&a->inode->i_count) == 1
- /* && a->mvd_h_src_inode->i_nlink == 1 */
- && (!plinked || !au_plink_test(a->inode))
- && a->inode->i_nlink == 1)
- goto out;
-
- err = -EBUSY;
- AU_MVD_PR(dmsg,
- "b%d, d{b%d, c%d?}, i{c%d?, l%u}, hi{l%u}, p{%d, %d}\n",
- a->mvd_bsrc, au_dbstart(a->dentry), au_dcount(a->dentry),
- atomic_read(&a->inode->i_count), a->inode->i_nlink,
- a->mvd_h_src_inode->i_nlink,
- plinked, plinked ? au_plink_test(a->inode) : 0);
-
-out:
- AuTraceErr(err);
- return err;
-}
-
-/* make sure the parent dir is fine */
-static int au_mvd_args_parent(const unsigned char dmsg,
- struct au_mvd_args *a)
-{
- int err;
- aufs_bindex_t bindex;
-
- err = 0;
- if (unlikely(au_alive_dir(a->parent))) {
- err = -ENOENT;
- AU_MVD_PR(dmsg, "parent dir is dead\n");
- goto out;
- }
-
- a->bopq = au_dbdiropq(a->parent);
- bindex = au_wbr_nonopq(a->dentry, a->mvd_bdst);
- AuDbg("b%d\n", bindex);
- if (unlikely((bindex >= 0 && bindex < a->mvd_bdst)
- || (a->bopq != -1 && a->bopq < a->mvd_bdst))) {
- err = -EINVAL;
- a->mvd_errno = EAU_MVDOWN_OPAQUE;
- AU_MVD_PR(dmsg, "ancestor is opaque b%d, b%d\n",
- a->bopq, a->mvd_bdst);
- }
-
-out:
- AuTraceErr(err);
- return err;
-}
-
-static int au_mvd_args_intermediate(const unsigned char dmsg,
- struct au_mvd_args *a)
-{
- int err;
- struct au_dinfo *dinfo, *tmp;
-
- /* lookup the next lower positive entry */
- err = -ENOMEM;
- tmp = au_di_alloc(a->sb, AuLsc_DI_TMP);
- if (unlikely(!tmp))
- goto out;
-
- a->bfound = -1;
- a->bwh = -1;
- dinfo = au_di(a->dentry);
- au_di_cp(tmp, dinfo);
- au_di_swap(tmp, dinfo);
-
- /* returns the number of positive dentries */
- err = au_lkup_dentry(a->dentry, a->mvd_bsrc + 1, /*type*/0);
- if (!err)
- a->bwh = au_dbwh(a->dentry);
- else if (err > 0)
- a->bfound = au_dbstart(a->dentry);
-
- au_di_swap(tmp, dinfo);
- au_rw_write_unlock(&tmp->di_rwsem);
- au_di_free(tmp);
- if (unlikely(err < 0))
- AU_MVD_PR(dmsg, "failed look-up lower\n");
-
- /*
- * here, we have these cases.
- * bfound == -1
- * no positive dentry under bsrc. there are more sub-cases.
- * bwh < 0
- * there no whiteout, we can safely move-down.
- * bwh <= bsrc
- * impossible
- * bsrc < bwh && bwh < bdst
- * there is a whiteout on RO branch. cannot proceed.
- * bwh == bdst
- * there is a whiteout on the RW target branch. it should
- * be removed.
- * bdst < bwh
- * there is a whiteout somewhere unrelated branch.
- * -1 < bfound && bfound <= bsrc
- * impossible.
- * bfound < bdst
- * found, but it is on RO branch between bsrc and bdst. cannot
- * proceed.
- * bfound == bdst
- * found, replace it if AUFS_MVDOWN_FORCE is set. otherwise return
- * error.
- * bdst < bfound
- * found, after we create the file on bdst, it will be hidden.
- */
-
- AuDebugOn(a->bfound == -1
- && a->bwh != -1
- && a->bwh <= a->mvd_bsrc);
- AuDebugOn(-1 < a->bfound
- && a->bfound <= a->mvd_bsrc);
-
- err = -EINVAL;
- if (a->bfound == -1
- && a->mvd_bsrc < a->bwh
- && a->bwh != -1
- && a->bwh < a->mvd_bdst) {
- a->mvd_errno = EAU_MVDOWN_WHITEOUT;
- AU_MVD_PR(dmsg, "bsrc %d, bdst %d, bfound %d, bwh %d\n",
- a->mvd_bsrc, a->mvd_bdst, a->bfound, a->bwh);
- goto out;
- } else if (a->bfound != -1 && a->bfound < a->mvd_bdst) {
- a->mvd_errno = EAU_MVDOWN_UPPER;
- AU_MVD_PR(dmsg, "bdst %d, bfound %d\n",
- a->mvd_bdst, a->bfound);
- goto out;
- }
-
- err = 0; /* success */
-
-out:
- AuTraceErr(err);
- return err;
-}
-
-static int au_mvd_args_exist(const unsigned char dmsg, struct au_mvd_args *a)
-{
- int err;
-
- err = 0;
- if (!(a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
- && a->bfound == a->mvd_bdst)
- err = -EEXIST;
- AuTraceErr(err);
- return err;
-}
-
-static int au_mvd_args(const unsigned char dmsg, struct au_mvd_args *a)
-{
- int err;
- struct au_branch *br;
-
- err = -EISDIR;
- if (unlikely(S_ISDIR(a->inode->i_mode)))
- goto out;
-
- err = -EINVAL;
- if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_UPPER))
- a->mvd_bsrc = au_ibstart(a->inode);
- else {
- a->mvd_bsrc = au_br_index(a->sb, a->mvd_src_brid);
- if (unlikely(a->mvd_bsrc < 0
- || (a->mvd_bsrc < au_dbstart(a->dentry)
- || au_dbend(a->dentry) < a->mvd_bsrc
- || !au_h_dptr(a->dentry, a->mvd_bsrc))
- || (a->mvd_bsrc < au_ibstart(a->inode)
- || au_ibend(a->inode) < a->mvd_bsrc
- || !au_h_iptr(a->inode, a->mvd_bsrc)))) {
- a->mvd_errno = EAU_MVDOWN_NOUPPER;
- AU_MVD_PR(dmsg, "no upper\n");
- goto out;
- }
- }
- if (unlikely(a->mvd_bsrc == au_sbend(a->sb))) {
- a->mvd_errno = EAU_MVDOWN_BOTTOM;
- AU_MVD_PR(dmsg, "on the bottom\n");
- goto out;
- }
- a->mvd_h_src_inode = au_h_iptr(a->inode, a->mvd_bsrc);
- br = au_sbr(a->sb, a->mvd_bsrc);
- err = au_br_rdonly(br);
- if (!(a->mvdown.flags & AUFS_MVDOWN_ROUPPER)) {
- if (unlikely(err))
- goto out;
- } else if (!(vfsub_native_ro(a->mvd_h_src_inode)
- || IS_APPEND(a->mvd_h_src_inode))) {
- if (err)
- a->mvdown.flags |= AUFS_MVDOWN_ROUPPER_R;
- /* go on */
- } else
- goto out;
-
- err = -EINVAL;
- if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_LOWER)) {
- a->mvd_bdst = find_lower_writable(a);
- if (unlikely(a->mvd_bdst < 0)) {
- a->mvd_errno = EAU_MVDOWN_BOTTOM;
- AU_MVD_PR(dmsg, "no writable lower branch\n");
- goto out;
- }
- } else {
- a->mvd_bdst = au_br_index(a->sb, a->mvd_dst_brid);
- if (unlikely(a->mvd_bdst < 0
- || au_sbend(a->sb) < a->mvd_bdst)) {
- a->mvd_errno = EAU_MVDOWN_NOLOWERBR;
- AU_MVD_PR(dmsg, "no lower brid\n");
- goto out;
- }
- }
-
- err = au_mvd_args_busy(dmsg, a);
- if (!err)
- err = au_mvd_args_parent(dmsg, a);
- if (!err)
- err = au_mvd_args_intermediate(dmsg, a);
- if (!err)
- err = au_mvd_args_exist(dmsg, a);
- if (!err)
- AuDbg("b%d, b%d\n", a->mvd_bsrc, a->mvd_bdst);
-
-out:
- AuTraceErr(err);
- return err;
-}
-
-int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *uarg)
-{
- int err, e;
- unsigned char dmsg;
- struct au_mvd_args *args;
- struct inode *inode;
-
- inode = d_inode(dentry);
- err = -EPERM;
- if (unlikely(!capable(CAP_SYS_ADMIN)))
- goto out;
-
- err = -ENOMEM;
- args = kmalloc(sizeof(*args), GFP_NOFS);
- if (unlikely(!args))
- goto out;
-
- err = copy_from_user(&args->mvdown, uarg, sizeof(args->mvdown));
- if (!err)
- err = !access_ok(VERIFY_WRITE, uarg, sizeof(*uarg));
- if (unlikely(err)) {
- err = -EFAULT;
- AuTraceErr(err);
- goto out_free;
- }
- AuDbg("flags 0x%x\n", args->mvdown.flags);
- args->mvdown.flags &= ~(AUFS_MVDOWN_ROLOWER_R | AUFS_MVDOWN_ROUPPER_R);
- args->mvdown.au_errno = 0;
- args->dentry = dentry;
- args->inode = inode;
- args->sb = dentry->d_sb;
-
- err = -ENOENT;
- dmsg = !!(args->mvdown.flags & AUFS_MVDOWN_DMSG);
- args->parent = dget_parent(dentry);
- args->dir = d_inode(args->parent);
- mutex_lock_nested(&args->dir->i_mutex, I_MUTEX_PARENT);
- dput(args->parent);
- if (unlikely(args->parent != dentry->d_parent)) {
- AU_MVD_PR(dmsg, "parent dir is moved\n");
- goto out_dir;
- }
-
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
- err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_NOPLMW);
- if (unlikely(err))
- goto out_inode;
-
- di_write_lock_parent(args->parent);
- err = au_mvd_args(dmsg, args);
- if (unlikely(err))
- goto out_parent;
-
- err = au_do_mvdown(dmsg, args);
- if (unlikely(err))
- goto out_parent;
-
- au_cpup_attr_timesizes(args->dir);
- au_cpup_attr_timesizes(inode);
- if (!(args->mvdown.flags & AUFS_MVDOWN_KUPPER))
- au_cpup_igen(inode, au_h_iptr(inode, args->mvd_bdst));
- /* au_digen_dec(dentry); */
-
-out_parent:
- di_write_unlock(args->parent);
- aufs_read_unlock(dentry, AuLock_DW);
-out_inode:
- mutex_unlock(&inode->i_mutex);
-out_dir:
- mutex_unlock(&args->dir->i_mutex);
-out_free:
- e = copy_to_user(uarg, &args->mvdown, sizeof(args->mvdown));
- if (unlikely(e))
- err = -EFAULT;
- kfree(args);
-out:
- AuTraceErr(err);
- return err;
-}
diff --git a/fs/aufs/opts.c b/fs/aufs/opts.c
deleted file mode 100644
index 5c39817f3..000000000
--- a/fs/aufs/opts.c
+++ /dev/null
@@ -1,1846 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * mount options/flags
- */
-
-#include <linux/namei.h>
-#include <linux/types.h> /* a distribution requires */
-#include <linux/parser.h>
-#include "aufs.h"
-
-/* ---------------------------------------------------------------------- */
-
-enum {
- Opt_br,
- Opt_add, Opt_del, Opt_mod, Opt_append, Opt_prepend,
- Opt_idel, Opt_imod,
- Opt_dirwh, Opt_rdcache, Opt_rdblk, Opt_rdhash,
- Opt_rdblk_def, Opt_rdhash_def,
- Opt_xino, Opt_noxino,
- Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino,
- Opt_trunc_xino_path, Opt_itrunc_xino,
- Opt_trunc_xib, Opt_notrunc_xib,
- Opt_shwh, Opt_noshwh,
- Opt_plink, Opt_noplink, Opt_list_plink,
- Opt_udba,
- Opt_dio, Opt_nodio,
- Opt_diropq_a, Opt_diropq_w,
- Opt_warn_perm, Opt_nowarn_perm,
- Opt_wbr_copyup, Opt_wbr_create,
- Opt_fhsm_sec,
- Opt_verbose, Opt_noverbose,
- Opt_sum, Opt_nosum, Opt_wsum,
- Opt_dirperm1, Opt_nodirperm1,
- Opt_acl, Opt_noacl,
- Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err
-};
-
-static match_table_t options = {
- {Opt_br, "br=%s"},
- {Opt_br, "br:%s"},
-
- {Opt_add, "add=%d:%s"},
- {Opt_add, "add:%d:%s"},
- {Opt_add, "ins=%d:%s"},
- {Opt_add, "ins:%d:%s"},
- {Opt_append, "append=%s"},
- {Opt_append, "append:%s"},
- {Opt_prepend, "prepend=%s"},
- {Opt_prepend, "prepend:%s"},
-
- {Opt_del, "del=%s"},
- {Opt_del, "del:%s"},
- /* {Opt_idel, "idel:%d"}, */
- {Opt_mod, "mod=%s"},
- {Opt_mod, "mod:%s"},
- /* {Opt_imod, "imod:%d:%s"}, */
-
- {Opt_dirwh, "dirwh=%d"},
-
- {Opt_xino, "xino=%s"},
- {Opt_noxino, "noxino"},
- {Opt_trunc_xino, "trunc_xino"},
- {Opt_trunc_xino_v, "trunc_xino_v=%d:%d"},
- {Opt_notrunc_xino, "notrunc_xino"},
- {Opt_trunc_xino_path, "trunc_xino=%s"},
- {Opt_itrunc_xino, "itrunc_xino=%d"},
- /* {Opt_zxino, "zxino=%s"}, */
- {Opt_trunc_xib, "trunc_xib"},
- {Opt_notrunc_xib, "notrunc_xib"},
-
-#ifdef CONFIG_PROC_FS
- {Opt_plink, "plink"},
-#else
- {Opt_ignore_silent, "plink"},
-#endif
-
- {Opt_noplink, "noplink"},
-
-#ifdef CONFIG_AUFS_DEBUG
- {Opt_list_plink, "list_plink"},
-#endif
-
- {Opt_udba, "udba=%s"},
-
- {Opt_dio, "dio"},
- {Opt_nodio, "nodio"},
-
-#ifdef CONFIG_AUFS_FHSM
- {Opt_fhsm_sec, "fhsm_sec=%d"},
-#else
- {Opt_ignore_silent, "fhsm_sec=%d"},
-#endif
-
- {Opt_diropq_a, "diropq=always"},
- {Opt_diropq_a, "diropq=a"},
- {Opt_diropq_w, "diropq=whiteouted"},
- {Opt_diropq_w, "diropq=w"},
-
- {Opt_warn_perm, "warn_perm"},
- {Opt_nowarn_perm, "nowarn_perm"},
-
- /* keep them temporary */
- {Opt_ignore_silent, "nodlgt"},
- {Opt_ignore_silent, "clean_plink"},
-
-#ifdef CONFIG_AUFS_SHWH
- {Opt_shwh, "shwh"},
-#endif
- {Opt_noshwh, "noshwh"},
-
- {Opt_dirperm1, "dirperm1"},
- {Opt_nodirperm1, "nodirperm1"},
-
- {Opt_verbose, "verbose"},
- {Opt_verbose, "v"},
- {Opt_noverbose, "noverbose"},
- {Opt_noverbose, "quiet"},
- {Opt_noverbose, "q"},
- {Opt_noverbose, "silent"},
-
- {Opt_sum, "sum"},
- {Opt_nosum, "nosum"},
- {Opt_wsum, "wsum"},
-
- {Opt_rdcache, "rdcache=%d"},
- {Opt_rdblk, "rdblk=%d"},
- {Opt_rdblk_def, "rdblk=def"},
- {Opt_rdhash, "rdhash=%d"},
- {Opt_rdhash_def, "rdhash=def"},
-
- {Opt_wbr_create, "create=%s"},
- {Opt_wbr_create, "create_policy=%s"},
- {Opt_wbr_copyup, "cpup=%s"},
- {Opt_wbr_copyup, "copyup=%s"},
- {Opt_wbr_copyup, "copyup_policy=%s"},
-
- /* generic VFS flag */
-#ifdef CONFIG_FS_POSIX_ACL
- {Opt_acl, "acl"},
- {Opt_noacl, "noacl"},
-#else
- {Opt_ignore_silent, "acl"},
- {Opt_ignore_silent, "noacl"},
-#endif
-
- /* internal use for the scripts */
- {Opt_ignore_silent, "si=%s"},
-
- {Opt_br, "dirs=%s"},
- {Opt_ignore, "debug=%d"},
- {Opt_ignore, "delete=whiteout"},
- {Opt_ignore, "delete=all"},
- {Opt_ignore, "imap=%s"},
-
- /* temporary workaround, due to old mount(8)? */
- {Opt_ignore_silent, "relatime"},
-
- {Opt_err, NULL}
-};
-
-/* ---------------------------------------------------------------------- */
-
-static const char *au_parser_pattern(int val, match_table_t tbl)
-{
- struct match_token *p;
-
- p = tbl;
- while (p->pattern) {
- if (p->token == val)
- return p->pattern;
- p++;
- }
- BUG();
- return "??";
-}
-
-static const char *au_optstr(int *val, match_table_t tbl)
-{
- struct match_token *p;
- int v;
-
- v = *val;
- if (!v)
- goto out;
- p = tbl;
- while (p->pattern) {
- if (p->token
- && (v & p->token) == p->token) {
- *val &= ~p->token;
- return p->pattern;
- }
- p++;
- }
-
-out:
- return NULL;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static match_table_t brperm = {
- {AuBrPerm_RO, AUFS_BRPERM_RO},
- {AuBrPerm_RR, AUFS_BRPERM_RR},
- {AuBrPerm_RW, AUFS_BRPERM_RW},
- {0, NULL}
-};
-
-static match_table_t brattr = {
- /* general */
- {AuBrAttr_COO_REG, AUFS_BRATTR_COO_REG},
- {AuBrAttr_COO_ALL, AUFS_BRATTR_COO_ALL},
- /* 'unpin' attrib is meaningless since linux-3.18-rc1 */
- {AuBrAttr_UNPIN, AUFS_BRATTR_UNPIN},
-#ifdef CONFIG_AUFS_FHSM
- {AuBrAttr_FHSM, AUFS_BRATTR_FHSM},
-#endif
-#ifdef CONFIG_AUFS_XATTR
- {AuBrAttr_ICEX, AUFS_BRATTR_ICEX},
- {AuBrAttr_ICEX_SEC, AUFS_BRATTR_ICEX_SEC},
- {AuBrAttr_ICEX_SYS, AUFS_BRATTR_ICEX_SYS},
- {AuBrAttr_ICEX_TR, AUFS_BRATTR_ICEX_TR},
- {AuBrAttr_ICEX_USR, AUFS_BRATTR_ICEX_USR},
- {AuBrAttr_ICEX_OTH, AUFS_BRATTR_ICEX_OTH},
-#endif
-
- /* ro/rr branch */
- {AuBrRAttr_WH, AUFS_BRRATTR_WH},
-
- /* rw branch */
- {AuBrWAttr_MOO, AUFS_BRWATTR_MOO},
- {AuBrWAttr_NoLinkWH, AUFS_BRWATTR_NLWH},
-
- {0, NULL}
-};
-
-static int br_attr_val(char *str, match_table_t table, substring_t args[])
-{
- int attr, v;
- char *p;
-
- attr = 0;
- do {
- p = strchr(str, '+');
- if (p)
- *p = 0;
- v = match_token(str, table, args);
- if (v) {
- if (v & AuBrAttr_CMOO_Mask)
- attr &= ~AuBrAttr_CMOO_Mask;
- attr |= v;
- } else {
- if (p)
- *p = '+';
- pr_warn("ignored branch attribute %s\n", str);
- break;
- }
- if (p)
- str = p + 1;
- } while (p);
-
- return attr;
-}
-
-static int au_do_optstr_br_attr(au_br_perm_str_t *str, int perm)
-{
- int sz;
- const char *p;
- char *q;
-
- q = str->a;
- *q = 0;
- p = au_optstr(&perm, brattr);
- if (p) {
- sz = strlen(p);
- memcpy(q, p, sz + 1);
- q += sz;
- } else
- goto out;
-
- do {
- p = au_optstr(&perm, brattr);
- if (p) {
- *q++ = '+';
- sz = strlen(p);
- memcpy(q, p, sz + 1);
- q += sz;
- }
- } while (p);
-
-out:
- return q - str->a;
-}
-
-static int noinline_for_stack br_perm_val(char *perm)
-{
- int val, bad, sz;
- char *p;
- substring_t args[MAX_OPT_ARGS];
- au_br_perm_str_t attr;
-
- p = strchr(perm, '+');
- if (p)
- *p = 0;
- val = match_token(perm, brperm, args);
- if (!val) {
- if (p)
- *p = '+';
- pr_warn("ignored branch permission %s\n", perm);
- val = AuBrPerm_RO;
- goto out;
- }
- if (!p)
- goto out;
-
- val |= br_attr_val(p + 1, brattr, args);
-
- bad = 0;
- switch (val & AuBrPerm_Mask) {
- case AuBrPerm_RO:
- case AuBrPerm_RR:
- bad = val & AuBrWAttr_Mask;
- val &= ~AuBrWAttr_Mask;
- break;
- case AuBrPerm_RW:
- bad = val & AuBrRAttr_Mask;
- val &= ~AuBrRAttr_Mask;
- break;
- }
-
- /*
- * 'unpin' attrib becomes meaningless since linux-3.18-rc1, but aufs
- * does not treat it as an error, just warning.
- * this is a tiny guard for the user operation.
- */
- if (val & AuBrAttr_UNPIN) {
- bad |= AuBrAttr_UNPIN;
- val &= ~AuBrAttr_UNPIN;
- }
-
- if (unlikely(bad)) {
- sz = au_do_optstr_br_attr(&attr, bad);
- AuDebugOn(!sz);
- pr_warn("ignored branch attribute %s\n", attr.a);
- }
-
-out:
- return val;
-}
-
-void au_optstr_br_perm(au_br_perm_str_t *str, int perm)
-{
- au_br_perm_str_t attr;
- const char *p;
- char *q;
- int sz;
-
- q = str->a;
- p = au_optstr(&perm, brperm);
- AuDebugOn(!p || !*p);
- sz = strlen(p);
- memcpy(q, p, sz + 1);
- q += sz;
-
- sz = au_do_optstr_br_attr(&attr, perm);
- if (sz) {
- *q++ = '+';
- memcpy(q, attr.a, sz + 1);
- }
-
- AuDebugOn(strlen(str->a) >= sizeof(str->a));
-}
-
-/* ---------------------------------------------------------------------- */
-
-static match_table_t udbalevel = {
- {AuOpt_UDBA_REVAL, "reval"},
- {AuOpt_UDBA_NONE, "none"},
-#ifdef CONFIG_AUFS_HNOTIFY
- {AuOpt_UDBA_HNOTIFY, "notify"}, /* abstraction */
-#ifdef CONFIG_AUFS_HFSNOTIFY
- {AuOpt_UDBA_HNOTIFY, "fsnotify"},
-#endif
-#endif
- {-1, NULL}
-};
-
-static int noinline_for_stack udba_val(char *str)
-{
- substring_t args[MAX_OPT_ARGS];
-
- return match_token(str, udbalevel, args);
-}
-
-const char *au_optstr_udba(int udba)
-{
- return au_parser_pattern(udba, udbalevel);
-}
-
-/* ---------------------------------------------------------------------- */
-
-static match_table_t au_wbr_create_policy = {
- {AuWbrCreate_TDP, "tdp"},
- {AuWbrCreate_TDP, "top-down-parent"},
- {AuWbrCreate_RR, "rr"},
- {AuWbrCreate_RR, "round-robin"},
- {AuWbrCreate_MFS, "mfs"},
- {AuWbrCreate_MFS, "most-free-space"},
- {AuWbrCreate_MFSV, "mfs:%d"},
- {AuWbrCreate_MFSV, "most-free-space:%d"},
-
- {AuWbrCreate_MFSRR, "mfsrr:%d"},
- {AuWbrCreate_MFSRRV, "mfsrr:%d:%d"},
- {AuWbrCreate_PMFS, "pmfs"},
- {AuWbrCreate_PMFSV, "pmfs:%d"},
- {AuWbrCreate_PMFSRR, "pmfsrr:%d"},
- {AuWbrCreate_PMFSRRV, "pmfsrr:%d:%d"},
-
- {-1, NULL}
-};
-
-/*
- * cf. linux/lib/parser.c and cmdline.c
- * gave up calling memparse() since it uses simple_strtoull() instead of
- * kstrto...().
- */
-static int noinline_for_stack
-au_match_ull(substring_t *s, unsigned long long *result)
-{
- int err;
- unsigned int len;
- char a[32];
-
- err = -ERANGE;
- len = s->to - s->from;
- if (len + 1 <= sizeof(a)) {
- memcpy(a, s->from, len);
- a[len] = '\0';
- err = kstrtoull(a, 0, result);
- }
- return err;
-}
-
-static int au_wbr_mfs_wmark(substring_t *arg, char *str,
- struct au_opt_wbr_create *create)
-{
- int err;
- unsigned long long ull;
-
- err = 0;
- if (!au_match_ull(arg, &ull))
- create->mfsrr_watermark = ull;
- else {
- pr_err("bad integer in %s\n", str);
- err = -EINVAL;
- }
-
- return err;
-}
-
-static int au_wbr_mfs_sec(substring_t *arg, char *str,
- struct au_opt_wbr_create *create)
-{
- int n, err;
-
- err = 0;
- if (!match_int(arg, &n) && 0 <= n && n <= AUFS_MFS_MAX_SEC)
- create->mfs_second = n;
- else {
- pr_err("bad integer in %s\n", str);
- err = -EINVAL;
- }
-
- return err;
-}
-
-static int noinline_for_stack
-au_wbr_create_val(char *str, struct au_opt_wbr_create *create)
-{
- int err, e;
- substring_t args[MAX_OPT_ARGS];
-
- err = match_token(str, au_wbr_create_policy, args);
- create->wbr_create = err;
- switch (err) {
- case AuWbrCreate_MFSRRV:
- case AuWbrCreate_PMFSRRV:
- e = au_wbr_mfs_wmark(&args[0], str, create);
- if (!e)
- e = au_wbr_mfs_sec(&args[1], str, create);
- if (unlikely(e))
- err = e;
- break;
- case AuWbrCreate_MFSRR:
- case AuWbrCreate_PMFSRR:
- e = au_wbr_mfs_wmark(&args[0], str, create);
- if (unlikely(e)) {
- err = e;
- break;
- }
- /*FALLTHROUGH*/
- case AuWbrCreate_MFS:
- case AuWbrCreate_PMFS:
- create->mfs_second = AUFS_MFS_DEF_SEC;
- break;
- case AuWbrCreate_MFSV:
- case AuWbrCreate_PMFSV:
- e = au_wbr_mfs_sec(&args[0], str, create);
- if (unlikely(e))
- err = e;
- break;
- }
-
- return err;
-}
-
-const char *au_optstr_wbr_create(int wbr_create)
-{
- return au_parser_pattern(wbr_create, au_wbr_create_policy);
-}
-
-static match_table_t au_wbr_copyup_policy = {
- {AuWbrCopyup_TDP, "tdp"},
- {AuWbrCopyup_TDP, "top-down-parent"},
- {AuWbrCopyup_BUP, "bup"},
- {AuWbrCopyup_BUP, "bottom-up-parent"},
- {AuWbrCopyup_BU, "bu"},
- {AuWbrCopyup_BU, "bottom-up"},
- {-1, NULL}
-};
-
-static int noinline_for_stack au_wbr_copyup_val(char *str)
-{
- substring_t args[MAX_OPT_ARGS];
-
- return match_token(str, au_wbr_copyup_policy, args);
-}
-
-const char *au_optstr_wbr_copyup(int wbr_copyup)
-{
- return au_parser_pattern(wbr_copyup, au_wbr_copyup_policy);
-}
-
-/* ---------------------------------------------------------------------- */
-
-static const int lkup_dirflags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
-
-static void dump_opts(struct au_opts *opts)
-{
-#ifdef CONFIG_AUFS_DEBUG
- /* reduce stack space */
- union {
- struct au_opt_add *add;
- struct au_opt_del *del;
- struct au_opt_mod *mod;
- struct au_opt_xino *xino;
- struct au_opt_xino_itrunc *xino_itrunc;
- struct au_opt_wbr_create *create;
- } u;
- struct au_opt *opt;
-
- opt = opts->opt;
- while (opt->type != Opt_tail) {
- switch (opt->type) {
- case Opt_add:
- u.add = &opt->add;
- AuDbg("add {b%d, %s, 0x%x, %p}\n",
- u.add->bindex, u.add->pathname, u.add->perm,
- u.add->path.dentry);
- break;
- case Opt_del:
- case Opt_idel:
- u.del = &opt->del;
- AuDbg("del {%s, %p}\n",
- u.del->pathname, u.del->h_path.dentry);
- break;
- case Opt_mod:
- case Opt_imod:
- u.mod = &opt->mod;
- AuDbg("mod {%s, 0x%x, %p}\n",
- u.mod->path, u.mod->perm, u.mod->h_root);
- break;
- case Opt_append:
- u.add = &opt->add;
- AuDbg("append {b%d, %s, 0x%x, %p}\n",
- u.add->bindex, u.add->pathname, u.add->perm,
- u.add->path.dentry);
- break;
- case Opt_prepend:
- u.add = &opt->add;
- AuDbg("prepend {b%d, %s, 0x%x, %p}\n",
- u.add->bindex, u.add->pathname, u.add->perm,
- u.add->path.dentry);
- break;
- case Opt_dirwh:
- AuDbg("dirwh %d\n", opt->dirwh);
- break;
- case Opt_rdcache:
- AuDbg("rdcache %d\n", opt->rdcache);
- break;
- case Opt_rdblk:
- AuDbg("rdblk %u\n", opt->rdblk);
- break;
- case Opt_rdblk_def:
- AuDbg("rdblk_def\n");
- break;
- case Opt_rdhash:
- AuDbg("rdhash %u\n", opt->rdhash);
- break;
- case Opt_rdhash_def:
- AuDbg("rdhash_def\n");
- break;
- case Opt_xino:
- u.xino = &opt->xino;
- AuDbg("xino {%s %pD}\n", u.xino->path, u.xino->file);
- break;
- case Opt_trunc_xino:
- AuLabel(trunc_xino);
- break;
- case Opt_notrunc_xino:
- AuLabel(notrunc_xino);
- break;
- case Opt_trunc_xino_path:
- case Opt_itrunc_xino:
- u.xino_itrunc = &opt->xino_itrunc;
- AuDbg("trunc_xino %d\n", u.xino_itrunc->bindex);
- break;
- case Opt_noxino:
- AuLabel(noxino);
- break;
- case Opt_trunc_xib:
- AuLabel(trunc_xib);
- break;
- case Opt_notrunc_xib:
- AuLabel(notrunc_xib);
- break;
- case Opt_shwh:
- AuLabel(shwh);
- break;
- case Opt_noshwh:
- AuLabel(noshwh);
- break;
- case Opt_dirperm1:
- AuLabel(dirperm1);
- break;
- case Opt_nodirperm1:
- AuLabel(nodirperm1);
- break;
- case Opt_plink:
- AuLabel(plink);
- break;
- case Opt_noplink:
- AuLabel(noplink);
- break;
- case Opt_list_plink:
- AuLabel(list_plink);
- break;
- case Opt_udba:
- AuDbg("udba %d, %s\n",
- opt->udba, au_optstr_udba(opt->udba));
- break;
- case Opt_dio:
- AuLabel(dio);
- break;
- case Opt_nodio:
- AuLabel(nodio);
- break;
- case Opt_diropq_a:
- AuLabel(diropq_a);
- break;
- case Opt_diropq_w:
- AuLabel(diropq_w);
- break;
- case Opt_warn_perm:
- AuLabel(warn_perm);
- break;
- case Opt_nowarn_perm:
- AuLabel(nowarn_perm);
- break;
- case Opt_verbose:
- AuLabel(verbose);
- break;
- case Opt_noverbose:
- AuLabel(noverbose);
- break;
- case Opt_sum:
- AuLabel(sum);
- break;
- case Opt_nosum:
- AuLabel(nosum);
- break;
- case Opt_wsum:
- AuLabel(wsum);
- break;
- case Opt_wbr_create:
- u.create = &opt->wbr_create;
- AuDbg("create %d, %s\n", u.create->wbr_create,
- au_optstr_wbr_create(u.create->wbr_create));
- switch (u.create->wbr_create) {
- case AuWbrCreate_MFSV:
- case AuWbrCreate_PMFSV:
- AuDbg("%d sec\n", u.create->mfs_second);
- break;
- case AuWbrCreate_MFSRR:
- AuDbg("%llu watermark\n",
- u.create->mfsrr_watermark);
- break;
- case AuWbrCreate_MFSRRV:
- case AuWbrCreate_PMFSRRV:
- AuDbg("%llu watermark, %d sec\n",
- u.create->mfsrr_watermark,
- u.create->mfs_second);
- break;
- }
- break;
- case Opt_wbr_copyup:
- AuDbg("copyup %d, %s\n", opt->wbr_copyup,
- au_optstr_wbr_copyup(opt->wbr_copyup));
- break;
- case Opt_fhsm_sec:
- AuDbg("fhsm_sec %u\n", opt->fhsm_second);
- break;
- case Opt_acl:
- AuLabel(acl);
- break;
- case Opt_noacl:
- AuLabel(noacl);
- break;
- default:
- BUG();
- }
- opt++;
- }
-#endif
-}
-
-void au_opts_free(struct au_opts *opts)
-{
- struct au_opt *opt;
-
- opt = opts->opt;
- while (opt->type != Opt_tail) {
- switch (opt->type) {
- case Opt_add:
- case Opt_append:
- case Opt_prepend:
- path_put(&opt->add.path);
- break;
- case Opt_del:
- case Opt_idel:
- path_put(&opt->del.h_path);
- break;
- case Opt_mod:
- case Opt_imod:
- dput(opt->mod.h_root);
- break;
- case Opt_xino:
- fput(opt->xino.file);
- break;
- }
- opt++;
- }
-}
-
-static int opt_add(struct au_opt *opt, char *opt_str, unsigned long sb_flags,
- aufs_bindex_t bindex)
-{
- int err;
- struct au_opt_add *add = &opt->add;
- char *p;
-
- add->bindex = bindex;
- add->perm = AuBrPerm_RO;
- add->pathname = opt_str;
- p = strchr(opt_str, '=');
- if (p) {
- *p++ = 0;
- if (*p)
- add->perm = br_perm_val(p);
- }
-
- err = vfsub_kern_path(add->pathname, lkup_dirflags, &add->path);
- if (!err) {
- if (!p) {
- add->perm = AuBrPerm_RO;
- if (au_test_fs_rr(add->path.dentry->d_sb))
- add->perm = AuBrPerm_RR;
- else if (!bindex && !(sb_flags & MS_RDONLY))
- add->perm = AuBrPerm_RW;
- }
- opt->type = Opt_add;
- goto out;
- }
- pr_err("lookup failed %s (%d)\n", add->pathname, err);
- err = -EINVAL;
-
-out:
- return err;
-}
-
-static int au_opts_parse_del(struct au_opt_del *del, substring_t args[])
-{
- int err;
-
- del->pathname = args[0].from;
- AuDbg("del path %s\n", del->pathname);
-
- err = vfsub_kern_path(del->pathname, lkup_dirflags, &del->h_path);
- if (unlikely(err))
- pr_err("lookup failed %s (%d)\n", del->pathname, err);
-
- return err;
-}
-
-#if 0 /* reserved for future use */
-static int au_opts_parse_idel(struct super_block *sb, aufs_bindex_t bindex,
- struct au_opt_del *del, substring_t args[])
-{
- int err;
- struct dentry *root;
-
- err = -EINVAL;
- root = sb->s_root;
- aufs_read_lock(root, AuLock_FLUSH);
- if (bindex < 0 || au_sbend(sb) < bindex) {
- pr_err("out of bounds, %d\n", bindex);
- goto out;
- }
-
- err = 0;
- del->h_path.dentry = dget(au_h_dptr(root, bindex));
- del->h_path.mnt = mntget(au_sbr_mnt(sb, bindex));
-
-out:
- aufs_read_unlock(root, !AuLock_IR);
- return err;
-}
-#endif
-
-static int noinline_for_stack
-au_opts_parse_mod(struct au_opt_mod *mod, substring_t args[])
-{
- int err;
- struct path path;
- char *p;
-
- err = -EINVAL;
- mod->path = args[0].from;
- p = strchr(mod->path, '=');
- if (unlikely(!p)) {
- pr_err("no permssion %s\n", args[0].from);
- goto out;
- }
-
- *p++ = 0;
- err = vfsub_kern_path(mod->path, lkup_dirflags, &path);
- if (unlikely(err)) {
- pr_err("lookup failed %s (%d)\n", mod->path, err);
- goto out;
- }
-
- mod->perm = br_perm_val(p);
- AuDbg("mod path %s, perm 0x%x, %s\n", mod->path, mod->perm, p);
- mod->h_root = dget(path.dentry);
- path_put(&path);
-
-out:
- return err;
-}
-
-#if 0 /* reserved for future use */
-static int au_opts_parse_imod(struct super_block *sb, aufs_bindex_t bindex,
- struct au_opt_mod *mod, substring_t args[])
-{
- int err;
- struct dentry *root;
-
- err = -EINVAL;
- root = sb->s_root;
- aufs_read_lock(root, AuLock_FLUSH);
- if (bindex < 0 || au_sbend(sb) < bindex) {
- pr_err("out of bounds, %d\n", bindex);
- goto out;
- }
-
- err = 0;
- mod->perm = br_perm_val(args[1].from);
- AuDbg("mod path %s, perm 0x%x, %s\n",
- mod->path, mod->perm, args[1].from);
- mod->h_root = dget(au_h_dptr(root, bindex));
-
-out:
- aufs_read_unlock(root, !AuLock_IR);
- return err;
-}
-#endif
-
-static int au_opts_parse_xino(struct super_block *sb, struct au_opt_xino *xino,
- substring_t args[])
-{
- int err;
- struct file *file;
-
- file = au_xino_create(sb, args[0].from, /*silent*/0);
- err = PTR_ERR(file);
- if (IS_ERR(file))
- goto out;
-
- err = -EINVAL;
- if (unlikely(file->f_path.dentry->d_sb == sb)) {
- fput(file);
- pr_err("%s must be outside\n", args[0].from);
- goto out;
- }
-
- err = 0;
- xino->file = file;
- xino->path = args[0].from;
-
-out:
- return err;
-}
-
-static int noinline_for_stack
-au_opts_parse_xino_itrunc_path(struct super_block *sb,
- struct au_opt_xino_itrunc *xino_itrunc,
- substring_t args[])
-{
- int err;
- aufs_bindex_t bend, bindex;
- struct path path;
- struct dentry *root;
-
- err = vfsub_kern_path(args[0].from, lkup_dirflags, &path);
- if (unlikely(err)) {
- pr_err("lookup failed %s (%d)\n", args[0].from, err);
- goto out;
- }
-
- xino_itrunc->bindex = -1;
- root = sb->s_root;
- aufs_read_lock(root, AuLock_FLUSH);
- bend = au_sbend(sb);
- for (bindex = 0; bindex <= bend; bindex++) {
- if (au_h_dptr(root, bindex) == path.dentry) {
- xino_itrunc->bindex = bindex;
- break;
- }
- }
- aufs_read_unlock(root, !AuLock_IR);
- path_put(&path);
-
- if (unlikely(xino_itrunc->bindex < 0)) {
- pr_err("no such branch %s\n", args[0].from);
- err = -EINVAL;
- }
-
-out:
- return err;
-}
-
-/* called without aufs lock */
-int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
-{
- int err, n, token;
- aufs_bindex_t bindex;
- unsigned char skipped;
- struct dentry *root;
- struct au_opt *opt, *opt_tail;
- char *opt_str;
- /* reduce the stack space */
- union {
- struct au_opt_xino_itrunc *xino_itrunc;
- struct au_opt_wbr_create *create;
- } u;
- struct {
- substring_t args[MAX_OPT_ARGS];
- } *a;
-
- err = -ENOMEM;
- a = kmalloc(sizeof(*a), GFP_NOFS);
- if (unlikely(!a))
- goto out;
-
- root = sb->s_root;
- err = 0;
- bindex = 0;
- opt = opts->opt;
- opt_tail = opt + opts->max_opt - 1;
- opt->type = Opt_tail;
- while (!err && (opt_str = strsep(&str, ",")) && *opt_str) {
- err = -EINVAL;
- skipped = 0;
- token = match_token(opt_str, options, a->args);
- switch (token) {
- case Opt_br:
- err = 0;
- while (!err && (opt_str = strsep(&a->args[0].from, ":"))
- && *opt_str) {
- err = opt_add(opt, opt_str, opts->sb_flags,
- bindex++);
- if (unlikely(!err && ++opt > opt_tail)) {
- err = -E2BIG;
- break;
- }
- opt->type = Opt_tail;
- skipped = 1;
- }
- break;
- case Opt_add:
- if (unlikely(match_int(&a->args[0], &n))) {
- pr_err("bad integer in %s\n", opt_str);
- break;
- }
- bindex = n;
- err = opt_add(opt, a->args[1].from, opts->sb_flags,
- bindex);
- if (!err)
- opt->type = token;
- break;
- case Opt_append:
- err = opt_add(opt, a->args[0].from, opts->sb_flags,
- /*dummy bindex*/1);
- if (!err)
- opt->type = token;
- break;
- case Opt_prepend:
- err = opt_add(opt, a->args[0].from, opts->sb_flags,
- /*bindex*/0);
- if (!err)
- opt->type = token;
- break;
- case Opt_del:
- err = au_opts_parse_del(&opt->del, a->args);
- if (!err)
- opt->type = token;
- break;
-#if 0 /* reserved for future use */
- case Opt_idel:
- del->pathname = "(indexed)";
- if (unlikely(match_int(&args[0], &n))) {
- pr_err("bad integer in %s\n", opt_str);
- break;
- }
- err = au_opts_parse_idel(sb, n, &opt->del, a->args);
- if (!err)
- opt->type = token;
- break;
-#endif
- case Opt_mod:
- err = au_opts_parse_mod(&opt->mod, a->args);
- if (!err)
- opt->type = token;
- break;
-#ifdef IMOD /* reserved for future use */
- case Opt_imod:
- u.mod->path = "(indexed)";
- if (unlikely(match_int(&a->args[0], &n))) {
- pr_err("bad integer in %s\n", opt_str);
- break;
- }
- err = au_opts_parse_imod(sb, n, &opt->mod, a->args);
- if (!err)
- opt->type = token;
- break;
-#endif
- case Opt_xino:
- err = au_opts_parse_xino(sb, &opt->xino, a->args);
- if (!err)
- opt->type = token;
- break;
-
- case Opt_trunc_xino_path:
- err = au_opts_parse_xino_itrunc_path
- (sb, &opt->xino_itrunc, a->args);
- if (!err)
- opt->type = token;
- break;
-
- case Opt_itrunc_xino:
- u.xino_itrunc = &opt->xino_itrunc;
- if (unlikely(match_int(&a->args[0], &n))) {
- pr_err("bad integer in %s\n", opt_str);
- break;
- }
- u.xino_itrunc->bindex = n;
- aufs_read_lock(root, AuLock_FLUSH);
- if (n < 0 || au_sbend(sb) < n) {
- pr_err("out of bounds, %d\n", n);
- aufs_read_unlock(root, !AuLock_IR);
- break;
- }
- aufs_read_unlock(root, !AuLock_IR);
- err = 0;
- opt->type = token;
- break;
-
- case Opt_dirwh:
- if (unlikely(match_int(&a->args[0], &opt->dirwh)))
- break;
- err = 0;
- opt->type = token;
- break;
-
- case Opt_rdcache:
- if (unlikely(match_int(&a->args[0], &n))) {
- pr_err("bad integer in %s\n", opt_str);
- break;
- }
- if (unlikely(n > AUFS_RDCACHE_MAX)) {
- pr_err("rdcache must be smaller than %d\n",
- AUFS_RDCACHE_MAX);
- break;
- }
- opt->rdcache = n;
- err = 0;
- opt->type = token;
- break;
- case Opt_rdblk:
- if (unlikely(match_int(&a->args[0], &n)
- || n < 0
- || n > KMALLOC_MAX_SIZE)) {
- pr_err("bad integer in %s\n", opt_str);
- break;
- }
- if (unlikely(n && n < NAME_MAX)) {
- pr_err("rdblk must be larger than %d\n",
- NAME_MAX);
- break;
- }
- opt->rdblk = n;
- err = 0;
- opt->type = token;
- break;
- case Opt_rdhash:
- if (unlikely(match_int(&a->args[0], &n)
- || n < 0
- || n * sizeof(struct hlist_head)
- > KMALLOC_MAX_SIZE)) {
- pr_err("bad integer in %s\n", opt_str);
- break;
- }
- opt->rdhash = n;
- err = 0;
- opt->type = token;
- break;
-
- case Opt_trunc_xino:
- case Opt_notrunc_xino:
- case Opt_noxino:
- case Opt_trunc_xib:
- case Opt_notrunc_xib:
- case Opt_shwh:
- case Opt_noshwh:
- case Opt_dirperm1:
- case Opt_nodirperm1:
- case Opt_plink:
- case Opt_noplink:
- case Opt_list_plink:
- case Opt_dio:
- case Opt_nodio:
- case Opt_diropq_a:
- case Opt_diropq_w:
- case Opt_warn_perm:
- case Opt_nowarn_perm:
- case Opt_verbose:
- case Opt_noverbose:
- case Opt_sum:
- case Opt_nosum:
- case Opt_wsum:
- case Opt_rdblk_def:
- case Opt_rdhash_def:
- case Opt_acl:
- case Opt_noacl:
- err = 0;
- opt->type = token;
- break;
-
- case Opt_udba:
- opt->udba = udba_val(a->args[0].from);
- if (opt->udba >= 0) {
- err = 0;
- opt->type = token;
- } else
- pr_err("wrong value, %s\n", opt_str);
- break;
-
- case Opt_wbr_create:
- u.create = &opt->wbr_create;
- u.create->wbr_create
- = au_wbr_create_val(a->args[0].from, u.create);
- if (u.create->wbr_create >= 0) {
- err = 0;
- opt->type = token;
- } else
- pr_err("wrong value, %s\n", opt_str);
- break;
- case Opt_wbr_copyup:
- opt->wbr_copyup = au_wbr_copyup_val(a->args[0].from);
- if (opt->wbr_copyup >= 0) {
- err = 0;
- opt->type = token;
- } else
- pr_err("wrong value, %s\n", opt_str);
- break;
-
- case Opt_fhsm_sec:
- if (unlikely(match_int(&a->args[0], &n)
- || n < 0)) {
- pr_err("bad integer in %s\n", opt_str);
- break;
- }
- if (sysaufs_brs) {
- opt->fhsm_second = n;
- opt->type = token;
- } else
- pr_warn("ignored %s\n", opt_str);
- err = 0;
- break;
-
- case Opt_ignore:
- pr_warn("ignored %s\n", opt_str);
- /*FALLTHROUGH*/
- case Opt_ignore_silent:
- skipped = 1;
- err = 0;
- break;
- case Opt_err:
- pr_err("unknown option %s\n", opt_str);
- break;
- }
-
- if (!err && !skipped) {
- if (unlikely(++opt > opt_tail)) {
- err = -E2BIG;
- opt--;
- opt->type = Opt_tail;
- break;
- }
- opt->type = Opt_tail;
- }
- }
-
- kfree(a);
- dump_opts(opts);
- if (unlikely(err))
- au_opts_free(opts);
-
-out:
- return err;
-}
-
-static int au_opt_wbr_create(struct super_block *sb,
- struct au_opt_wbr_create *create)
-{
- int err;
- struct au_sbinfo *sbinfo;
-
- SiMustWriteLock(sb);
-
- err = 1; /* handled */
- sbinfo = au_sbi(sb);
- if (sbinfo->si_wbr_create_ops->fin) {
- err = sbinfo->si_wbr_create_ops->fin(sb);
- if (!err)
- err = 1;
- }
-
- sbinfo->si_wbr_create = create->wbr_create;
- sbinfo->si_wbr_create_ops = au_wbr_create_ops + create->wbr_create;
- switch (create->wbr_create) {
- case AuWbrCreate_MFSRRV:
- case AuWbrCreate_MFSRR:
- case AuWbrCreate_PMFSRR:
- case AuWbrCreate_PMFSRRV:
- sbinfo->si_wbr_mfs.mfsrr_watermark = create->mfsrr_watermark;
- /*FALLTHROUGH*/
- case AuWbrCreate_MFS:
- case AuWbrCreate_MFSV:
- case AuWbrCreate_PMFS:
- case AuWbrCreate_PMFSV:
- sbinfo->si_wbr_mfs.mfs_expire
- = msecs_to_jiffies(create->mfs_second * MSEC_PER_SEC);
- break;
- }
-
- if (sbinfo->si_wbr_create_ops->init)
- sbinfo->si_wbr_create_ops->init(sb); /* ignore */
-
- return err;
-}
-
-/*
- * returns,
- * plus: processed without an error
- * zero: unprocessed
- */
-static int au_opt_simple(struct super_block *sb, struct au_opt *opt,
- struct au_opts *opts)
-{
- int err;
- struct au_sbinfo *sbinfo;
-
- SiMustWriteLock(sb);
-
- err = 1; /* handled */
- sbinfo = au_sbi(sb);
- switch (opt->type) {
- case Opt_udba:
- sbinfo->si_mntflags &= ~AuOptMask_UDBA;
- sbinfo->si_mntflags |= opt->udba;
- opts->given_udba |= opt->udba;
- break;
-
- case Opt_plink:
- au_opt_set(sbinfo->si_mntflags, PLINK);
- break;
- case Opt_noplink:
- if (au_opt_test(sbinfo->si_mntflags, PLINK))
- au_plink_put(sb, /*verbose*/1);
- au_opt_clr(sbinfo->si_mntflags, PLINK);
- break;
- case Opt_list_plink:
- if (au_opt_test(sbinfo->si_mntflags, PLINK))
- au_plink_list(sb);
- break;
-
- case Opt_dio:
- au_opt_set(sbinfo->si_mntflags, DIO);
- au_fset_opts(opts->flags, REFRESH_DYAOP);
- break;
- case Opt_nodio:
- au_opt_clr(sbinfo->si_mntflags, DIO);
- au_fset_opts(opts->flags, REFRESH_DYAOP);
- break;
-
- case Opt_fhsm_sec:
- au_fhsm_set(sbinfo, opt->fhsm_second);
- break;
-
- case Opt_diropq_a:
- au_opt_set(sbinfo->si_mntflags, ALWAYS_DIROPQ);
- break;
- case Opt_diropq_w:
- au_opt_clr(sbinfo->si_mntflags, ALWAYS_DIROPQ);
- break;
-
- case Opt_warn_perm:
- au_opt_set(sbinfo->si_mntflags, WARN_PERM);
- break;
- case Opt_nowarn_perm:
- au_opt_clr(sbinfo->si_mntflags, WARN_PERM);
- break;
-
- case Opt_verbose:
- au_opt_set(sbinfo->si_mntflags, VERBOSE);
- break;
- case Opt_noverbose:
- au_opt_clr(sbinfo->si_mntflags, VERBOSE);
- break;
-
- case Opt_sum:
- au_opt_set(sbinfo->si_mntflags, SUM);
- break;
- case Opt_wsum:
- au_opt_clr(sbinfo->si_mntflags, SUM);
- au_opt_set(sbinfo->si_mntflags, SUM_W);
- case Opt_nosum:
- au_opt_clr(sbinfo->si_mntflags, SUM);
- au_opt_clr(sbinfo->si_mntflags, SUM_W);
- break;
-
- case Opt_wbr_create:
- err = au_opt_wbr_create(sb, &opt->wbr_create);
- break;
- case Opt_wbr_copyup:
- sbinfo->si_wbr_copyup = opt->wbr_copyup;
- sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + opt->wbr_copyup;
- break;
-
- case Opt_dirwh:
- sbinfo->si_dirwh = opt->dirwh;
- break;
-
- case Opt_rdcache:
- sbinfo->si_rdcache
- = msecs_to_jiffies(opt->rdcache * MSEC_PER_SEC);
- break;
- case Opt_rdblk:
- sbinfo->si_rdblk = opt->rdblk;
- break;
- case Opt_rdblk_def:
- sbinfo->si_rdblk = AUFS_RDBLK_DEF;
- break;
- case Opt_rdhash:
- sbinfo->si_rdhash = opt->rdhash;
- break;
- case Opt_rdhash_def:
- sbinfo->si_rdhash = AUFS_RDHASH_DEF;
- break;
-
- case Opt_shwh:
- au_opt_set(sbinfo->si_mntflags, SHWH);
- break;
- case Opt_noshwh:
- au_opt_clr(sbinfo->si_mntflags, SHWH);
- break;
-
- case Opt_dirperm1:
- au_opt_set(sbinfo->si_mntflags, DIRPERM1);
- break;
- case Opt_nodirperm1:
- au_opt_clr(sbinfo->si_mntflags, DIRPERM1);
- break;
-
- case Opt_trunc_xino:
- au_opt_set(sbinfo->si_mntflags, TRUNC_XINO);
- break;
- case Opt_notrunc_xino:
- au_opt_clr(sbinfo->si_mntflags, TRUNC_XINO);
- break;
-
- case Opt_trunc_xino_path:
- case Opt_itrunc_xino:
- err = au_xino_trunc(sb, opt->xino_itrunc.bindex);
- if (!err)
- err = 1;
- break;
-
- case Opt_trunc_xib:
- au_fset_opts(opts->flags, TRUNC_XIB);
- break;
- case Opt_notrunc_xib:
- au_fclr_opts(opts->flags, TRUNC_XIB);
- break;
-
- case Opt_acl:
- sb->s_flags |= MS_POSIXACL;
- break;
- case Opt_noacl:
- sb->s_flags &= ~MS_POSIXACL;
- break;
-
- default:
- err = 0;
- break;
- }
-
- return err;
-}
-
-/*
- * returns tri-state.
- * plus: processed without an error
- * zero: unprocessed
- * minus: error
- */
-static int au_opt_br(struct super_block *sb, struct au_opt *opt,
- struct au_opts *opts)
-{
- int err, do_refresh;
-
- err = 0;
- switch (opt->type) {
- case Opt_append:
- opt->add.bindex = au_sbend(sb) + 1;
- if (opt->add.bindex < 0)
- opt->add.bindex = 0;
- goto add;
- case Opt_prepend:
- opt->add.bindex = 0;
- add: /* indented label */
- case Opt_add:
- err = au_br_add(sb, &opt->add,
- au_ftest_opts(opts->flags, REMOUNT));
- if (!err) {
- err = 1;
- au_fset_opts(opts->flags, REFRESH);
- }
- break;
-
- case Opt_del:
- case Opt_idel:
- err = au_br_del(sb, &opt->del,
- au_ftest_opts(opts->flags, REMOUNT));
- if (!err) {
- err = 1;
- au_fset_opts(opts->flags, TRUNC_XIB);
- au_fset_opts(opts->flags, REFRESH);
- }
- break;
-
- case Opt_mod:
- case Opt_imod:
- err = au_br_mod(sb, &opt->mod,
- au_ftest_opts(opts->flags, REMOUNT),
- &do_refresh);
- if (!err) {
- err = 1;
- if (do_refresh)
- au_fset_opts(opts->flags, REFRESH);
- }
- break;
- }
-
- return err;
-}
-
-static int au_opt_xino(struct super_block *sb, struct au_opt *opt,
- struct au_opt_xino **opt_xino,
- struct au_opts *opts)
-{
- int err;
- aufs_bindex_t bend, bindex;
- struct dentry *root, *parent, *h_root;
-
- err = 0;
- switch (opt->type) {
- case Opt_xino:
- err = au_xino_set(sb, &opt->xino,
- !!au_ftest_opts(opts->flags, REMOUNT));
- if (unlikely(err))
- break;
-
- *opt_xino = &opt->xino;
- au_xino_brid_set(sb, -1);
-
- /* safe d_parent access */
- parent = opt->xino.file->f_path.dentry->d_parent;
- root = sb->s_root;
- bend = au_sbend(sb);
- for (bindex = 0; bindex <= bend; bindex++) {
- h_root = au_h_dptr(root, bindex);
- if (h_root == parent) {
- au_xino_brid_set(sb, au_sbr_id(sb, bindex));
- break;
- }
- }
- break;
-
- case Opt_noxino:
- au_xino_clr(sb);
- au_xino_brid_set(sb, -1);
- *opt_xino = (void *)-1;
- break;
- }
-
- return err;
-}
-
-int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
- unsigned int pending)
-{
- int err, fhsm;
- aufs_bindex_t bindex, bend;
- unsigned char do_plink, skip, do_free, can_no_dreval;
- struct au_branch *br;
- struct au_wbr *wbr;
- struct dentry *root, *dentry;
- struct inode *dir, *h_dir;
- struct au_sbinfo *sbinfo;
- struct au_hinode *hdir;
-
- SiMustAnyLock(sb);
-
- sbinfo = au_sbi(sb);
- AuDebugOn(!(sbinfo->si_mntflags & AuOptMask_UDBA));
-
- if (!(sb_flags & MS_RDONLY)) {
- if (unlikely(!au_br_writable(au_sbr_perm(sb, 0))))
- pr_warn("first branch should be rw\n");
- if (unlikely(au_opt_test(sbinfo->si_mntflags, SHWH)))
- pr_warn("shwh should be used with ro\n");
- }
-
- if (au_opt_test((sbinfo->si_mntflags | pending), UDBA_HNOTIFY)
- && !au_opt_test(sbinfo->si_mntflags, XINO))
- pr_warn("udba=*notify requires xino\n");
-
- if (au_opt_test(sbinfo->si_mntflags, DIRPERM1))
- pr_warn("dirperm1 breaks the protection"
- " by the permission bits on the lower branch\n");
-
- err = 0;
- fhsm = 0;
- root = sb->s_root;
- dir = d_inode(root);
- do_plink = !!au_opt_test(sbinfo->si_mntflags, PLINK);
- can_no_dreval = !!au_opt_test((sbinfo->si_mntflags | pending),
- UDBA_NONE);
- bend = au_sbend(sb);
- for (bindex = 0; !err && bindex <= bend; bindex++) {
- skip = 0;
- h_dir = au_h_iptr(dir, bindex);
- br = au_sbr(sb, bindex);
-
- if ((br->br_perm & AuBrAttr_ICEX)
- && !h_dir->i_op->listxattr)
- br->br_perm &= ~AuBrAttr_ICEX;
-#if 0
- if ((br->br_perm & AuBrAttr_ICEX_SEC)
- && (au_br_sb(br)->s_flags & MS_NOSEC))
- br->br_perm &= ~AuBrAttr_ICEX_SEC;
-#endif
-
- do_free = 0;
- wbr = br->br_wbr;
- if (wbr)
- wbr_wh_read_lock(wbr);
-
- if (!au_br_writable(br->br_perm)) {
- do_free = !!wbr;
- skip = (!wbr
- || (!wbr->wbr_whbase
- && !wbr->wbr_plink
- && !wbr->wbr_orph));
- } else if (!au_br_wh_linkable(br->br_perm)) {
- /* skip = (!br->br_whbase && !br->br_orph); */
- skip = (!wbr || !wbr->wbr_whbase);
- if (skip && wbr) {
- if (do_plink)
- skip = !!wbr->wbr_plink;
- else
- skip = !wbr->wbr_plink;
- }
- } else {
- /* skip = (br->br_whbase && br->br_ohph); */
- skip = (wbr && wbr->wbr_whbase);
- if (skip) {
- if (do_plink)
- skip = !!wbr->wbr_plink;
- else
- skip = !wbr->wbr_plink;
- }
- }
- if (wbr)
- wbr_wh_read_unlock(wbr);
-
- if (can_no_dreval) {
- dentry = br->br_path.dentry;
- spin_lock(&dentry->d_lock);
- if (dentry->d_flags &
- (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE))
- can_no_dreval = 0;
- spin_unlock(&dentry->d_lock);
- }
-
- if (au_br_fhsm(br->br_perm)) {
- fhsm++;
- AuDebugOn(!br->br_fhsm);
- }
-
- if (skip)
- continue;
-
- hdir = au_hi(dir, bindex);
- au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
- if (wbr)
- wbr_wh_write_lock(wbr);
- err = au_wh_init(br, sb);
- if (wbr)
- wbr_wh_write_unlock(wbr);
- au_hn_imtx_unlock(hdir);
-
- if (!err && do_free) {
- kfree(wbr);
- br->br_wbr = NULL;
- }
- }
-
- if (can_no_dreval)
- au_fset_si(sbinfo, NO_DREVAL);
- else
- au_fclr_si(sbinfo, NO_DREVAL);
-
- if (fhsm >= 2) {
- au_fset_si(sbinfo, FHSM);
- for (bindex = bend; bindex >= 0; bindex--) {
- br = au_sbr(sb, bindex);
- if (au_br_fhsm(br->br_perm)) {
- au_fhsm_set_bottom(sb, bindex);
- break;
- }
- }
- } else {
- au_fclr_si(sbinfo, FHSM);
- au_fhsm_set_bottom(sb, -1);
- }
-
- return err;
-}
-
-int au_opts_mount(struct super_block *sb, struct au_opts *opts)
-{
- int err;
- unsigned int tmp;
- aufs_bindex_t bindex, bend;
- struct au_opt *opt;
- struct au_opt_xino *opt_xino, xino;
- struct au_sbinfo *sbinfo;
- struct au_branch *br;
- struct inode *dir;
-
- SiMustWriteLock(sb);
-
- err = 0;
- opt_xino = NULL;
- opt = opts->opt;
- while (err >= 0 && opt->type != Opt_tail)
- err = au_opt_simple(sb, opt++, opts);
- if (err > 0)
- err = 0;
- else if (unlikely(err < 0))
- goto out;
-
- /* disable xino and udba temporary */
- sbinfo = au_sbi(sb);
- tmp = sbinfo->si_mntflags;
- au_opt_clr(sbinfo->si_mntflags, XINO);
- au_opt_set_udba(sbinfo->si_mntflags, UDBA_REVAL);
-
- opt = opts->opt;
- while (err >= 0 && opt->type != Opt_tail)
- err = au_opt_br(sb, opt++, opts);
- if (err > 0)
- err = 0;
- else if (unlikely(err < 0))
- goto out;
-
- bend = au_sbend(sb);
- if (unlikely(bend < 0)) {
- err = -EINVAL;
- pr_err("no branches\n");
- goto out;
- }
-
- if (au_opt_test(tmp, XINO))
- au_opt_set(sbinfo->si_mntflags, XINO);
- opt = opts->opt;
- while (!err && opt->type != Opt_tail)
- err = au_opt_xino(sb, opt++, &opt_xino, opts);
- if (unlikely(err))
- goto out;
-
- err = au_opts_verify(sb, sb->s_flags, tmp);
- if (unlikely(err))
- goto out;
-
- /* restore xino */
- if (au_opt_test(tmp, XINO) && !opt_xino) {
- xino.file = au_xino_def(sb);
- err = PTR_ERR(xino.file);
- if (IS_ERR(xino.file))
- goto out;
-
- err = au_xino_set(sb, &xino, /*remount*/0);
- fput(xino.file);
- if (unlikely(err))
- goto out;
- }
-
- /* restore udba */
- tmp &= AuOptMask_UDBA;
- sbinfo->si_mntflags &= ~AuOptMask_UDBA;
- sbinfo->si_mntflags |= tmp;
- bend = au_sbend(sb);
- for (bindex = 0; bindex <= bend; bindex++) {
- br = au_sbr(sb, bindex);
- err = au_hnotify_reset_br(tmp, br, br->br_perm);
- if (unlikely(err))
- AuIOErr("hnotify failed on br %d, %d, ignored\n",
- bindex, err);
- /* go on even if err */
- }
- if (au_opt_test(tmp, UDBA_HNOTIFY)) {
- dir = d_inode(sb->s_root);
- au_hn_reset(dir, au_hi_flags(dir, /*isdir*/1) & ~AuHi_XINO);
- }
-
-out:
- return err;
-}
-
-int au_opts_remount(struct super_block *sb, struct au_opts *opts)
-{
- int err, rerr;
- unsigned char no_dreval;
- struct inode *dir;
- struct au_opt_xino *opt_xino;
- struct au_opt *opt;
- struct au_sbinfo *sbinfo;
-
- SiMustWriteLock(sb);
-
- err = 0;
- dir = d_inode(sb->s_root);
- sbinfo = au_sbi(sb);
- opt_xino = NULL;
- opt = opts->opt;
- while (err >= 0 && opt->type != Opt_tail) {
- err = au_opt_simple(sb, opt, opts);
- if (!err)
- err = au_opt_br(sb, opt, opts);
- if (!err)
- err = au_opt_xino(sb, opt, &opt_xino, opts);
- opt++;
- }
- if (err > 0)
- err = 0;
- AuTraceErr(err);
- /* go on even err */
-
- no_dreval = !!au_ftest_si(sbinfo, NO_DREVAL);
- rerr = au_opts_verify(sb, opts->sb_flags, /*pending*/0);
- if (unlikely(rerr && !err))
- err = rerr;
-
- if (no_dreval != !!au_ftest_si(sbinfo, NO_DREVAL))
- au_fset_opts(opts->flags, REFRESH_IDOP);
-
- if (au_ftest_opts(opts->flags, TRUNC_XIB)) {
- rerr = au_xib_trunc(sb);
- if (unlikely(rerr && !err))
- err = rerr;
- }
-
- /* will be handled by the caller */
- if (!au_ftest_opts(opts->flags, REFRESH)
- && (opts->given_udba
- || au_opt_test(sbinfo->si_mntflags, XINO)
- || au_ftest_opts(opts->flags, REFRESH_IDOP)
- ))
- au_fset_opts(opts->flags, REFRESH);
-
- AuDbg("status 0x%x\n", opts->flags);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-unsigned int au_opt_udba(struct super_block *sb)
-{
- return au_mntflags(sb) & AuOptMask_UDBA;
-}
diff --git a/fs/aufs/opts.h b/fs/aufs/opts.h
deleted file mode 100644
index 0d6c2e1c7..000000000
--- a/fs/aufs/opts.h
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * mount options/flags
- */
-
-#ifndef __AUFS_OPTS_H__
-#define __AUFS_OPTS_H__
-
-#ifdef __KERNEL__
-
-#include <linux/path.h>
-
-struct file;
-struct super_block;
-
-/* ---------------------------------------------------------------------- */
-
-/* mount flags */
-#define AuOpt_XINO 1 /* external inode number bitmap
- and translation table */
-#define AuOpt_TRUNC_XINO (1 << 1) /* truncate xino files */
-#define AuOpt_UDBA_NONE (1 << 2) /* users direct branch access */
-#define AuOpt_UDBA_REVAL (1 << 3)
-#define AuOpt_UDBA_HNOTIFY (1 << 4)
-#define AuOpt_SHWH (1 << 5) /* show whiteout */
-#define AuOpt_PLINK (1 << 6) /* pseudo-link */
-#define AuOpt_DIRPERM1 (1 << 7) /* ignore the lower dir's perm
- bits */
-#define AuOpt_ALWAYS_DIROPQ (1 << 9) /* policy to creating diropq */
-#define AuOpt_SUM (1 << 10) /* summation for statfs(2) */
-#define AuOpt_SUM_W (1 << 11) /* unimplemented */
-#define AuOpt_WARN_PERM (1 << 12) /* warn when add-branch */
-#define AuOpt_VERBOSE (1 << 13) /* busy inode when del-branch */
-#define AuOpt_DIO (1 << 14) /* direct io */
-
-#ifndef CONFIG_AUFS_HNOTIFY
-#undef AuOpt_UDBA_HNOTIFY
-#define AuOpt_UDBA_HNOTIFY 0
-#endif
-#ifndef CONFIG_AUFS_SHWH
-#undef AuOpt_SHWH
-#define AuOpt_SHWH 0
-#endif
-
-#define AuOpt_Def (AuOpt_XINO \
- | AuOpt_UDBA_REVAL \
- | AuOpt_PLINK \
- /* | AuOpt_DIRPERM1 */ \
- | AuOpt_WARN_PERM)
-#define AuOptMask_UDBA (AuOpt_UDBA_NONE \
- | AuOpt_UDBA_REVAL \
- | AuOpt_UDBA_HNOTIFY)
-
-#define au_opt_test(flags, name) (flags & AuOpt_##name)
-#define au_opt_set(flags, name) do { \
- BUILD_BUG_ON(AuOpt_##name & AuOptMask_UDBA); \
- ((flags) |= AuOpt_##name); \
-} while (0)
-#define au_opt_set_udba(flags, name) do { \
- (flags) &= ~AuOptMask_UDBA; \
- ((flags) |= AuOpt_##name); \
-} while (0)
-#define au_opt_clr(flags, name) do { \
- ((flags) &= ~AuOpt_##name); \
-} while (0)
-
-static inline unsigned int au_opts_plink(unsigned int mntflags)
-{
-#ifdef CONFIG_PROC_FS
- return mntflags;
-#else
- return mntflags & ~AuOpt_PLINK;
-#endif
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* policies to select one among multiple writable branches */
-enum {
- AuWbrCreate_TDP, /* top down parent */
- AuWbrCreate_RR, /* round robin */
- AuWbrCreate_MFS, /* most free space */
- AuWbrCreate_MFSV, /* mfs with seconds */
- AuWbrCreate_MFSRR, /* mfs then rr */
- AuWbrCreate_MFSRRV, /* mfs then rr with seconds */
- AuWbrCreate_PMFS, /* parent and mfs */
- AuWbrCreate_PMFSV, /* parent and mfs with seconds */
- AuWbrCreate_PMFSRR, /* parent, mfs and round-robin */
- AuWbrCreate_PMFSRRV, /* plus seconds */
-
- AuWbrCreate_Def = AuWbrCreate_TDP
-};
-
-enum {
- AuWbrCopyup_TDP, /* top down parent */
- AuWbrCopyup_BUP, /* bottom up parent */
- AuWbrCopyup_BU, /* bottom up */
-
- AuWbrCopyup_Def = AuWbrCopyup_TDP
-};
-
-/* ---------------------------------------------------------------------- */
-
-struct au_opt_add {
- aufs_bindex_t bindex;
- char *pathname;
- int perm;
- struct path path;
-};
-
-struct au_opt_del {
- char *pathname;
- struct path h_path;
-};
-
-struct au_opt_mod {
- char *path;
- int perm;
- struct dentry *h_root;
-};
-
-struct au_opt_xino {
- char *path;
- struct file *file;
-};
-
-struct au_opt_xino_itrunc {
- aufs_bindex_t bindex;
-};
-
-struct au_opt_wbr_create {
- int wbr_create;
- int mfs_second;
- unsigned long long mfsrr_watermark;
-};
-
-struct au_opt {
- int type;
- union {
- struct au_opt_xino xino;
- struct au_opt_xino_itrunc xino_itrunc;
- struct au_opt_add add;
- struct au_opt_del del;
- struct au_opt_mod mod;
- int dirwh;
- int rdcache;
- unsigned int rdblk;
- unsigned int rdhash;
- int udba;
- struct au_opt_wbr_create wbr_create;
- int wbr_copyup;
- unsigned int fhsm_second;
- };
-};
-
-/* opts flags */
-#define AuOpts_REMOUNT 1
-#define AuOpts_REFRESH (1 << 1)
-#define AuOpts_TRUNC_XIB (1 << 2)
-#define AuOpts_REFRESH_DYAOP (1 << 3)
-#define AuOpts_REFRESH_IDOP (1 << 4)
-#define au_ftest_opts(flags, name) ((flags) & AuOpts_##name)
-#define au_fset_opts(flags, name) \
- do { (flags) |= AuOpts_##name; } while (0)
-#define au_fclr_opts(flags, name) \
- do { (flags) &= ~AuOpts_##name; } while (0)
-
-struct au_opts {
- struct au_opt *opt;
- int max_opt;
-
- unsigned int given_udba;
- unsigned int flags;
- unsigned long sb_flags;
-};
-
-/* ---------------------------------------------------------------------- */
-
-/* opts.c */
-void au_optstr_br_perm(au_br_perm_str_t *str, int perm);
-const char *au_optstr_udba(int udba);
-const char *au_optstr_wbr_copyup(int wbr_copyup);
-const char *au_optstr_wbr_create(int wbr_create);
-
-void au_opts_free(struct au_opts *opts);
-int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts);
-int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
- unsigned int pending);
-int au_opts_mount(struct super_block *sb, struct au_opts *opts);
-int au_opts_remount(struct super_block *sb, struct au_opts *opts);
-
-unsigned int au_opt_udba(struct super_block *sb);
-
-#endif /* __KERNEL__ */
-#endif /* __AUFS_OPTS_H__ */
diff --git a/fs/aufs/plink.c b/fs/aufs/plink.c
deleted file mode 100644
index 6fdab1e0e..000000000
--- a/fs/aufs/plink.c
+++ /dev/null
@@ -1,515 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * pseudo-link
- */
-
-#include "aufs.h"
-
-/*
- * the pseudo-link maintenance mode.
- * during a user process maintains the pseudo-links,
- * prohibit adding a new plink and branch manipulation.
- *
- * Flags
- * NOPLM:
- * For entry functions which will handle plink, and i_mutex is already held
- * in VFS.
- * They cannot wait and should return an error at once.
- * Callers has to check the error.
- * NOPLMW:
- * For entry functions which will handle plink, but i_mutex is not held
- * in VFS.
- * They can wait the plink maintenance mode to finish.
- *
- * They behave like F_SETLK and F_SETLKW.
- * If the caller never handle plink, then both flags are unnecessary.
- */
-
-int au_plink_maint(struct super_block *sb, int flags)
-{
- int err;
- pid_t pid, ppid;
- struct au_sbinfo *sbi;
-
- SiMustAnyLock(sb);
-
- err = 0;
- if (!au_opt_test(au_mntflags(sb), PLINK))
- goto out;
-
- sbi = au_sbi(sb);
- pid = sbi->si_plink_maint_pid;
- if (!pid || pid == current->pid)
- goto out;
-
- /* todo: it highly depends upon /sbin/mount.aufs */
- rcu_read_lock();
- ppid = task_pid_vnr(rcu_dereference(current->real_parent));
- rcu_read_unlock();
- if (pid == ppid)
- goto out;
-
- if (au_ftest_lock(flags, NOPLMW)) {
- /* if there is no i_mutex lock in VFS, we don't need to wait */
- /* AuDebugOn(!lockdep_depth(current)); */
- while (sbi->si_plink_maint_pid) {
- si_read_unlock(sb);
- /* gave up wake_up_bit() */
- wait_event(sbi->si_plink_wq, !sbi->si_plink_maint_pid);
-
- if (au_ftest_lock(flags, FLUSH))
- au_nwt_flush(&sbi->si_nowait);
- si_noflush_read_lock(sb);
- }
- } else if (au_ftest_lock(flags, NOPLM)) {
- AuDbg("ppid %d, pid %d\n", ppid, pid);
- err = -EAGAIN;
- }
-
-out:
- return err;
-}
-
-void au_plink_maint_leave(struct au_sbinfo *sbinfo)
-{
- spin_lock(&sbinfo->si_plink_maint_lock);
- sbinfo->si_plink_maint_pid = 0;
- spin_unlock(&sbinfo->si_plink_maint_lock);
- wake_up_all(&sbinfo->si_plink_wq);
-}
-
-int au_plink_maint_enter(struct super_block *sb)
-{
- int err;
- struct au_sbinfo *sbinfo;
-
- err = 0;
- sbinfo = au_sbi(sb);
- /* make sure i am the only one in this fs */
- si_write_lock(sb, AuLock_FLUSH);
- if (au_opt_test(au_mntflags(sb), PLINK)) {
- spin_lock(&sbinfo->si_plink_maint_lock);
- if (!sbinfo->si_plink_maint_pid)
- sbinfo->si_plink_maint_pid = current->pid;
- else
- err = -EBUSY;
- spin_unlock(&sbinfo->si_plink_maint_lock);
- }
- si_write_unlock(sb);
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-#ifdef CONFIG_AUFS_DEBUG
-void au_plink_list(struct super_block *sb)
-{
- int i;
- struct au_sbinfo *sbinfo;
- struct hlist_head *plink_hlist;
- struct pseudo_link *plink;
-
- SiMustAnyLock(sb);
-
- sbinfo = au_sbi(sb);
- AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
- AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
-
- for (i = 0; i < AuPlink_NHASH; i++) {
- plink_hlist = &sbinfo->si_plink[i].head;
- rcu_read_lock();
- hlist_for_each_entry_rcu(plink, plink_hlist, hlist)
- AuDbg("%lu\n", plink->inode->i_ino);
- rcu_read_unlock();
- }
-}
-#endif
-
-/* is the inode pseudo-linked? */
-int au_plink_test(struct inode *inode)
-{
- int found, i;
- struct au_sbinfo *sbinfo;
- struct hlist_head *plink_hlist;
- struct pseudo_link *plink;
-
- sbinfo = au_sbi(inode->i_sb);
- AuRwMustAnyLock(&sbinfo->si_rwsem);
- AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
- AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
-
- found = 0;
- i = au_plink_hash(inode->i_ino);
- plink_hlist = &sbinfo->si_plink[i].head;
- rcu_read_lock();
- hlist_for_each_entry_rcu(plink, plink_hlist, hlist)
- if (plink->inode == inode) {
- found = 1;
- break;
- }
- rcu_read_unlock();
- return found;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * generate a name for plink.
- * the file will be stored under AUFS_WH_PLINKDIR.
- */
-/* 20 is max digits length of ulong 64 */
-#define PLINK_NAME_LEN ((20 + 1) * 2)
-
-static int plink_name(char *name, int len, struct inode *inode,
- aufs_bindex_t bindex)
-{
- int rlen;
- struct inode *h_inode;
-
- h_inode = au_h_iptr(inode, bindex);
- rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
- return rlen;
-}
-
-struct au_do_plink_lkup_args {
- struct dentry **errp;
- struct qstr *tgtname;
- struct dentry *h_parent;
- struct au_branch *br;
-};
-
-static struct dentry *au_do_plink_lkup(struct qstr *tgtname,
- struct dentry *h_parent,
- struct au_branch *br)
-{
- struct dentry *h_dentry;
- struct mutex *h_mtx;
-
- h_mtx = &d_inode(h_parent)->i_mutex;
- mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
- h_dentry = vfsub_lkup_one(tgtname, h_parent);
- mutex_unlock(h_mtx);
- return h_dentry;
-}
-
-static void au_call_do_plink_lkup(void *args)
-{
- struct au_do_plink_lkup_args *a = args;
- *a->errp = au_do_plink_lkup(a->tgtname, a->h_parent, a->br);
-}
-
-/* lookup the plink-ed @inode under the branch at @bindex */
-struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
-{
- struct dentry *h_dentry, *h_parent;
- struct au_branch *br;
- int wkq_err;
- char a[PLINK_NAME_LEN];
- struct qstr tgtname = QSTR_INIT(a, 0);
-
- AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
-
- br = au_sbr(inode->i_sb, bindex);
- h_parent = br->br_wbr->wbr_plink;
- tgtname.len = plink_name(a, sizeof(a), inode, bindex);
-
- if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
- struct au_do_plink_lkup_args args = {
- .errp = &h_dentry,
- .tgtname = &tgtname,
- .h_parent = h_parent,
- .br = br
- };
-
- wkq_err = au_wkq_wait(au_call_do_plink_lkup, &args);
- if (unlikely(wkq_err))
- h_dentry = ERR_PTR(wkq_err);
- } else
- h_dentry = au_do_plink_lkup(&tgtname, h_parent, br);
-
- return h_dentry;
-}
-
-/* create a pseudo-link */
-static int do_whplink(struct qstr *tgt, struct dentry *h_parent,
- struct dentry *h_dentry, struct au_branch *br)
-{
- int err;
- struct path h_path = {
- .mnt = au_br_mnt(br)
- };
- struct inode *h_dir, *delegated;
-
- h_dir = d_inode(h_parent);
- mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_CHILD2);
-again:
- h_path.dentry = vfsub_lkup_one(tgt, h_parent);
- err = PTR_ERR(h_path.dentry);
- if (IS_ERR(h_path.dentry))
- goto out;
-
- err = 0;
- /* wh.plink dir is not monitored */
- /* todo: is it really safe? */
- if (d_is_positive(h_path.dentry)
- && d_inode(h_path.dentry) != d_inode(h_dentry)) {
- delegated = NULL;
- err = vfsub_unlink(h_dir, &h_path, &delegated, /*force*/0);
- if (unlikely(err == -EWOULDBLOCK)) {
- pr_warn("cannot retry for NFSv4 delegation"
- " for an internal unlink\n");
- iput(delegated);
- }
- dput(h_path.dentry);
- h_path.dentry = NULL;
- if (!err)
- goto again;
- }
- if (!err && d_is_negative(h_path.dentry)) {
- delegated = NULL;
- err = vfsub_link(h_dentry, h_dir, &h_path, &delegated);
- if (unlikely(err == -EWOULDBLOCK)) {
- pr_warn("cannot retry for NFSv4 delegation"
- " for an internal link\n");
- iput(delegated);
- }
- }
- dput(h_path.dentry);
-
-out:
- mutex_unlock(&h_dir->i_mutex);
- return err;
-}
-
-struct do_whplink_args {
- int *errp;
- struct qstr *tgt;
- struct dentry *h_parent;
- struct dentry *h_dentry;
- struct au_branch *br;
-};
-
-static void call_do_whplink(void *args)
-{
- struct do_whplink_args *a = args;
- *a->errp = do_whplink(a->tgt, a->h_parent, a->h_dentry, a->br);
-}
-
-static int whplink(struct dentry *h_dentry, struct inode *inode,
- aufs_bindex_t bindex, struct au_branch *br)
-{
- int err, wkq_err;
- struct au_wbr *wbr;
- struct dentry *h_parent;
- char a[PLINK_NAME_LEN];
- struct qstr tgtname = QSTR_INIT(a, 0);
-
- wbr = au_sbr(inode->i_sb, bindex)->br_wbr;
- h_parent = wbr->wbr_plink;
- tgtname.len = plink_name(a, sizeof(a), inode, bindex);
-
- /* always superio. */
- if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
- struct do_whplink_args args = {
- .errp = &err,
- .tgt = &tgtname,
- .h_parent = h_parent,
- .h_dentry = h_dentry,
- .br = br
- };
- wkq_err = au_wkq_wait(call_do_whplink, &args);
- if (unlikely(wkq_err))
- err = wkq_err;
- } else
- err = do_whplink(&tgtname, h_parent, h_dentry, br);
-
- return err;
-}
-
-/* free a single plink */
-static void do_put_plink(struct pseudo_link *plink, int do_del)
-{
- if (do_del)
- hlist_del(&plink->hlist);
- iput(plink->inode);
- kfree(plink);
-}
-
-static void do_put_plink_rcu(struct rcu_head *rcu)
-{
- struct pseudo_link *plink;
-
- plink = container_of(rcu, struct pseudo_link, rcu);
- iput(plink->inode);
- kfree(plink);
-}
-
-/*
- * create a new pseudo-link for @h_dentry on @bindex.
- * the linked inode is held in aufs @inode.
- */
-void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
- struct dentry *h_dentry)
-{
- struct super_block *sb;
- struct au_sbinfo *sbinfo;
- struct hlist_head *plink_hlist;
- struct pseudo_link *plink, *tmp;
- struct au_sphlhead *sphl;
- int found, err, cnt, i;
-
- sb = inode->i_sb;
- sbinfo = au_sbi(sb);
- AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
- AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
-
- found = au_plink_test(inode);
- if (found)
- return;
-
- i = au_plink_hash(inode->i_ino);
- sphl = sbinfo->si_plink + i;
- plink_hlist = &sphl->head;
- tmp = kmalloc(sizeof(*plink), GFP_NOFS);
- if (tmp)
- tmp->inode = au_igrab(inode);
- else {
- err = -ENOMEM;
- goto out;
- }
-
- spin_lock(&sphl->spin);
- hlist_for_each_entry(plink, plink_hlist, hlist) {
- if (plink->inode == inode) {
- found = 1;
- break;
- }
- }
- if (!found)
- hlist_add_head_rcu(&tmp->hlist, plink_hlist);
- spin_unlock(&sphl->spin);
- if (!found) {
- cnt = au_sphl_count(sphl);
-#define msg "unexpectedly unblanced or too many pseudo-links"
- if (cnt > AUFS_PLINK_WARN)
- AuWarn1(msg ", %d\n", cnt);
-#undef msg
- err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex));
- } else {
- do_put_plink(tmp, 0);
- return;
- }
-
-out:
- if (unlikely(err)) {
- pr_warn("err %d, damaged pseudo link.\n", err);
- if (tmp) {
- au_sphl_del_rcu(&tmp->hlist, sphl);
- call_rcu(&tmp->rcu, do_put_plink_rcu);
- }
- }
-}
-
-/* free all plinks */
-void au_plink_put(struct super_block *sb, int verbose)
-{
- int i, warned;
- struct au_sbinfo *sbinfo;
- struct hlist_head *plink_hlist;
- struct hlist_node *tmp;
- struct pseudo_link *plink;
-
- SiMustWriteLock(sb);
-
- sbinfo = au_sbi(sb);
- AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
- AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
-
- /* no spin_lock since sbinfo is write-locked */
- warned = 0;
- for (i = 0; i < AuPlink_NHASH; i++) {
- plink_hlist = &sbinfo->si_plink[i].head;
- if (!warned && verbose && !hlist_empty(plink_hlist)) {
- pr_warn("pseudo-link is not flushed");
- warned = 1;
- }
- hlist_for_each_entry_safe(plink, tmp, plink_hlist, hlist)
- do_put_plink(plink, 0);
- INIT_HLIST_HEAD(plink_hlist);
- }
-}
-
-void au_plink_clean(struct super_block *sb, int verbose)
-{
- struct dentry *root;
-
- root = sb->s_root;
- aufs_write_lock(root);
- if (au_opt_test(au_mntflags(sb), PLINK))
- au_plink_put(sb, verbose);
- aufs_write_unlock(root);
-}
-
-static int au_plink_do_half_refresh(struct inode *inode, aufs_bindex_t br_id)
-{
- int do_put;
- aufs_bindex_t bstart, bend, bindex;
-
- do_put = 0;
- bstart = au_ibstart(inode);
- bend = au_ibend(inode);
- if (bstart >= 0) {
- for (bindex = bstart; bindex <= bend; bindex++) {
- if (!au_h_iptr(inode, bindex)
- || au_ii_br_id(inode, bindex) != br_id)
- continue;
- au_set_h_iptr(inode, bindex, NULL, 0);
- do_put = 1;
- break;
- }
- if (do_put)
- for (bindex = bstart; bindex <= bend; bindex++)
- if (au_h_iptr(inode, bindex)) {
- do_put = 0;
- break;
- }
- } else
- do_put = 1;
-
- return do_put;
-}
-
-/* free the plinks on a branch specified by @br_id */
-void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id)
-{
- struct au_sbinfo *sbinfo;
- struct hlist_head *plink_hlist;
- struct hlist_node *tmp;
- struct pseudo_link *plink;
- struct inode *inode;
- int i, do_put;
-
- SiMustWriteLock(sb);
-
- sbinfo = au_sbi(sb);
- AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
- AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
-
- /* no spin_lock since sbinfo is write-locked */
- for (i = 0; i < AuPlink_NHASH; i++) {
- plink_hlist = &sbinfo->si_plink[i].head;
- hlist_for_each_entry_safe(plink, tmp, plink_hlist, hlist) {
- inode = au_igrab(plink->inode);
- ii_write_lock_child(inode);
- do_put = au_plink_do_half_refresh(inode, br_id);
- if (do_put)
- do_put_plink(plink, 1);
- ii_write_unlock(inode);
- iput(inode);
- }
- }
-}
diff --git a/fs/aufs/poll.c b/fs/aufs/poll.c
deleted file mode 100644
index dd2baf5dc..000000000
--- a/fs/aufs/poll.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * poll operation
- * There is only one filesystem which implements ->poll operation, currently.
- */
-
-#include "aufs.h"
-
-unsigned int aufs_poll(struct file *file, poll_table *wait)
-{
- unsigned int mask;
- int err;
- struct file *h_file;
- struct super_block *sb;
-
- /* We should pretend an error happened. */
- mask = POLLERR /* | POLLIN | POLLOUT */;
- sb = file->f_path.dentry->d_sb;
- si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
-
- h_file = au_read_pre(file, /*keep_fi*/0);
- err = PTR_ERR(h_file);
- if (IS_ERR(h_file))
- goto out;
-
- /* it is not an error if h_file has no operation */
- mask = DEFAULT_POLLMASK;
- if (h_file->f_op->poll)
- mask = h_file->f_op->poll(h_file, wait);
- fput(h_file); /* instead of au_read_post() */
-
-out:
- si_read_unlock(sb);
- AuTraceErr((int)mask);
- return mask;
-}
diff --git a/fs/aufs/posix_acl.c b/fs/aufs/posix_acl.c
deleted file mode 100644
index a3c442c08..000000000
--- a/fs/aufs/posix_acl.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (C) 2014-2016 Junjiro R. Okajima
- */
-
-/*
- * posix acl operations
- */
-
-#include <linux/fs.h>
-#include "aufs.h"
-
-struct posix_acl *aufs_get_acl(struct inode *inode, int type)
-{
- struct posix_acl *acl;
- int err;
- aufs_bindex_t bindex;
- struct inode *h_inode;
- struct super_block *sb;
-
- acl = NULL;
- sb = inode->i_sb;
- si_read_lock(sb, AuLock_FLUSH);
- ii_read_lock_child(inode);
- if (!(sb->s_flags & MS_POSIXACL))
- goto out;
-
- bindex = au_ibstart(inode);
- h_inode = au_h_iptr(inode, bindex);
- if (unlikely(!h_inode
- || ((h_inode->i_mode & S_IFMT)
- != (inode->i_mode & S_IFMT)))) {
- err = au_busy_or_stale();
- acl = ERR_PTR(err);
- goto out;
- }
-
- /* always topmost only */
- acl = get_acl(h_inode, type);
-
-out:
- ii_read_unlock(inode);
- si_read_unlock(sb);
-
- AuTraceErrPtr(acl);
- return acl;
-}
-
-int aufs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
-{
- int err;
- ssize_t ssz;
- struct dentry *dentry;
- struct au_srxattr arg = {
- .type = AU_ACL_SET,
- .u.acl_set = {
- .acl = acl,
- .type = type
- },
- };
-
- mutex_lock(&inode->i_mutex);
- if (inode->i_ino == AUFS_ROOT_INO)
- dentry = dget(inode->i_sb->s_root);
- else {
- dentry = d_find_alias(inode);
- if (!dentry)
- dentry = d_find_any_alias(inode);
- if (!dentry) {
- pr_warn("cannot handle this inode, "
- "please report to aufs-users ML\n");
- err = -ENOENT;
- goto out;
- }
- }
-
- ssz = au_srxattr(dentry, &arg);
- dput(dentry);
- err = ssz;
- if (ssz >= 0)
- err = 0;
-
-out:
- mutex_unlock(&inode->i_mutex);
- return err;
-}
diff --git a/fs/aufs/procfs.c b/fs/aufs/procfs.c
deleted file mode 100644
index 2c8893edf..000000000
--- a/fs/aufs/procfs.c
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (C) 2010-2016 Junjiro R. Okajima
- */
-
-/*
- * procfs interfaces
- */
-
-#include <linux/proc_fs.h>
-#include "aufs.h"
-
-static int au_procfs_plm_release(struct inode *inode, struct file *file)
-{
- struct au_sbinfo *sbinfo;
-
- sbinfo = file->private_data;
- if (sbinfo) {
- au_plink_maint_leave(sbinfo);
- kobject_put(&sbinfo->si_kobj);
- }
-
- return 0;
-}
-
-static void au_procfs_plm_write_clean(struct file *file)
-{
- struct au_sbinfo *sbinfo;
-
- sbinfo = file->private_data;
- if (sbinfo)
- au_plink_clean(sbinfo->si_sb, /*verbose*/0);
-}
-
-static int au_procfs_plm_write_si(struct file *file, unsigned long id)
-{
- int err;
- struct super_block *sb;
- struct au_sbinfo *sbinfo;
-
- err = -EBUSY;
- if (unlikely(file->private_data))
- goto out;
-
- sb = NULL;
- /* don't use au_sbilist_lock() here */
- spin_lock(&au_sbilist.spin);
- list_for_each_entry(sbinfo, &au_sbilist.head, si_list)
- if (id == sysaufs_si_id(sbinfo)) {
- kobject_get(&sbinfo->si_kobj);
- sb = sbinfo->si_sb;
- break;
- }
- spin_unlock(&au_sbilist.spin);
-
- err = -EINVAL;
- if (unlikely(!sb))
- goto out;
-
- err = au_plink_maint_enter(sb);
- if (!err)
- /* keep kobject_get() */
- file->private_data = sbinfo;
- else
- kobject_put(&sbinfo->si_kobj);
-out:
- return err;
-}
-
-/*
- * Accept a valid "si=xxxx" only.
- * Once it is accepted successfully, accept "clean" too.
- */
-static ssize_t au_procfs_plm_write(struct file *file, const char __user *ubuf,
- size_t count, loff_t *ppos)
-{
- ssize_t err;
- unsigned long id;
- /* last newline is allowed */
- char buf[3 + sizeof(unsigned long) * 2 + 1];
-
- err = -EACCES;
- if (unlikely(!capable(CAP_SYS_ADMIN)))
- goto out;
-
- err = -EINVAL;
- if (unlikely(count > sizeof(buf)))
- goto out;
-
- err = copy_from_user(buf, ubuf, count);
- if (unlikely(err)) {
- err = -EFAULT;
- goto out;
- }
- buf[count] = 0;
-
- err = -EINVAL;
- if (!strcmp("clean", buf)) {
- au_procfs_plm_write_clean(file);
- goto out_success;
- } else if (unlikely(strncmp("si=", buf, 3)))
- goto out;
-
- err = kstrtoul(buf + 3, 16, &id);
- if (unlikely(err))
- goto out;
-
- err = au_procfs_plm_write_si(file, id);
- if (unlikely(err))
- goto out;
-
-out_success:
- err = count; /* success */
-out:
- return err;
-}
-
-static const struct file_operations au_procfs_plm_fop = {
- .write = au_procfs_plm_write,
- .release = au_procfs_plm_release,
- .owner = THIS_MODULE
-};
-
-/* ---------------------------------------------------------------------- */
-
-static struct proc_dir_entry *au_procfs_dir;
-
-void au_procfs_fin(void)
-{
- remove_proc_entry(AUFS_PLINK_MAINT_NAME, au_procfs_dir);
- remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
-}
-
-int __init au_procfs_init(void)
-{
- int err;
- struct proc_dir_entry *entry;
-
- err = -ENOMEM;
- au_procfs_dir = proc_mkdir(AUFS_PLINK_MAINT_DIR, NULL);
- if (unlikely(!au_procfs_dir))
- goto out;
-
- entry = proc_create(AUFS_PLINK_MAINT_NAME, S_IFREG | S_IWUSR,
- au_procfs_dir, &au_procfs_plm_fop);
- if (unlikely(!entry))
- goto out_dir;
-
- err = 0;
- goto out; /* success */
-
-
-out_dir:
- remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
-out:
- return err;
-}
diff --git a/fs/aufs/rdu.c b/fs/aufs/rdu.c
deleted file mode 100644
index a9e9e9893..000000000
--- a/fs/aufs/rdu.c
+++ /dev/null
@@ -1,375 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * readdir in userspace.
- */
-
-#include <linux/compat.h>
-#include <linux/fs_stack.h>
-#include <linux/security.h>
-#include "aufs.h"
-
-/* bits for struct aufs_rdu.flags */
-#define AuRdu_CALLED 1
-#define AuRdu_CONT (1 << 1)
-#define AuRdu_FULL (1 << 2)
-#define au_ftest_rdu(flags, name) ((flags) & AuRdu_##name)
-#define au_fset_rdu(flags, name) \
- do { (flags) |= AuRdu_##name; } while (0)
-#define au_fclr_rdu(flags, name) \
- do { (flags) &= ~AuRdu_##name; } while (0)
-
-struct au_rdu_arg {
- struct dir_context ctx;
- struct aufs_rdu *rdu;
- union au_rdu_ent_ul ent;
- unsigned long end;
-
- struct super_block *sb;
- int err;
-};
-
-static int au_rdu_fill(struct dir_context *ctx, const char *name, int nlen,
- loff_t offset, u64 h_ino, unsigned int d_type)
-{
- int err, len;
- struct au_rdu_arg *arg = container_of(ctx, struct au_rdu_arg, ctx);
- struct aufs_rdu *rdu = arg->rdu;
- struct au_rdu_ent ent;
-
- err = 0;
- arg->err = 0;
- au_fset_rdu(rdu->cookie.flags, CALLED);
- len = au_rdu_len(nlen);
- if (arg->ent.ul + len < arg->end) {
- ent.ino = h_ino;
- ent.bindex = rdu->cookie.bindex;
- ent.type = d_type;
- ent.nlen = nlen;
- if (unlikely(nlen > AUFS_MAX_NAMELEN))
- ent.type = DT_UNKNOWN;
-
- /* unnecessary to support mmap_sem since this is a dir */
- err = -EFAULT;
- if (copy_to_user(arg->ent.e, &ent, sizeof(ent)))
- goto out;
- if (copy_to_user(arg->ent.e->name, name, nlen))
- goto out;
- /* the terminating NULL */
- if (__put_user(0, arg->ent.e->name + nlen))
- goto out;
- err = 0;
- /* AuDbg("%p, %.*s\n", arg->ent.p, nlen, name); */
- arg->ent.ul += len;
- rdu->rent++;
- } else {
- err = -EFAULT;
- au_fset_rdu(rdu->cookie.flags, FULL);
- rdu->full = 1;
- rdu->tail = arg->ent;
- }
-
-out:
- /* AuTraceErr(err); */
- return err;
-}
-
-static int au_rdu_do(struct file *h_file, struct au_rdu_arg *arg)
-{
- int err;
- loff_t offset;
- struct au_rdu_cookie *cookie = &arg->rdu->cookie;
-
- /* we don't have to care (FMODE_32BITHASH | FMODE_64BITHASH) for ext4 */
- offset = vfsub_llseek(h_file, cookie->h_pos, SEEK_SET);
- err = offset;
- if (unlikely(offset != cookie->h_pos))
- goto out;
-
- err = 0;
- do {
- arg->err = 0;
- au_fclr_rdu(cookie->flags, CALLED);
- /* smp_mb(); */
- err = vfsub_iterate_dir(h_file, &arg->ctx);
- if (err >= 0)
- err = arg->err;
- } while (!err
- && au_ftest_rdu(cookie->flags, CALLED)
- && !au_ftest_rdu(cookie->flags, FULL));
- cookie->h_pos = h_file->f_pos;
-
-out:
- AuTraceErr(err);
- return err;
-}
-
-static int au_rdu(struct file *file, struct aufs_rdu *rdu)
-{
- int err;
- aufs_bindex_t bend;
- struct au_rdu_arg arg = {
- .ctx = {
- .actor = au_rdu_fill
- }
- };
- struct dentry *dentry;
- struct inode *inode;
- struct file *h_file;
- struct au_rdu_cookie *cookie = &rdu->cookie;
-
- err = !access_ok(VERIFY_WRITE, rdu->ent.e, rdu->sz);
- if (unlikely(err)) {
- err = -EFAULT;
- AuTraceErr(err);
- goto out;
- }
- rdu->rent = 0;
- rdu->tail = rdu->ent;
- rdu->full = 0;
- arg.rdu = rdu;
- arg.ent = rdu->ent;
- arg.end = arg.ent.ul;
- arg.end += rdu->sz;
-
- err = -ENOTDIR;
- if (unlikely(!file->f_op->iterate))
- goto out;
-
- err = security_file_permission(file, MAY_READ);
- AuTraceErr(err);
- if (unlikely(err))
- goto out;
-
- dentry = file->f_path.dentry;
- inode = d_inode(dentry);
-#if 1
- mutex_lock(&inode->i_mutex);
-#else
- err = mutex_lock_killable(&inode->i_mutex);
- AuTraceErr(err);
- if (unlikely(err))
- goto out;
-#endif
-
- arg.sb = inode->i_sb;
- err = si_read_lock(arg.sb, AuLock_FLUSH | AuLock_NOPLM);
- if (unlikely(err))
- goto out_mtx;
- err = au_alive_dir(dentry);
- if (unlikely(err))
- goto out_si;
- /* todo: reval? */
- fi_read_lock(file);
-
- err = -EAGAIN;
- if (unlikely(au_ftest_rdu(cookie->flags, CONT)
- && cookie->generation != au_figen(file)))
- goto out_unlock;
-
- err = 0;
- if (!rdu->blk) {
- rdu->blk = au_sbi(arg.sb)->si_rdblk;
- if (!rdu->blk)
- rdu->blk = au_dir_size(file, /*dentry*/NULL);
- }
- bend = au_fbstart(file);
- if (cookie->bindex < bend)
- cookie->bindex = bend;
- bend = au_fbend_dir(file);
- /* AuDbg("b%d, b%d\n", cookie->bindex, bend); */
- for (; !err && cookie->bindex <= bend;
- cookie->bindex++, cookie->h_pos = 0) {
- h_file = au_hf_dir(file, cookie->bindex);
- if (!h_file)
- continue;
-
- au_fclr_rdu(cookie->flags, FULL);
- err = au_rdu_do(h_file, &arg);
- AuTraceErr(err);
- if (unlikely(au_ftest_rdu(cookie->flags, FULL) || err))
- break;
- }
- AuDbg("rent %llu\n", rdu->rent);
-
- if (!err && !au_ftest_rdu(cookie->flags, CONT)) {
- rdu->shwh = !!au_opt_test(au_sbi(arg.sb)->si_mntflags, SHWH);
- au_fset_rdu(cookie->flags, CONT);
- cookie->generation = au_figen(file);
- }
-
- ii_read_lock_child(inode);
- fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibstart(inode)));
- ii_read_unlock(inode);
-
-out_unlock:
- fi_read_unlock(file);
-out_si:
- si_read_unlock(arg.sb);
-out_mtx:
- mutex_unlock(&inode->i_mutex);
-out:
- AuTraceErr(err);
- return err;
-}
-
-static int au_rdu_ino(struct file *file, struct aufs_rdu *rdu)
-{
- int err;
- ino_t ino;
- unsigned long long nent;
- union au_rdu_ent_ul *u;
- struct au_rdu_ent ent;
- struct super_block *sb;
-
- err = 0;
- nent = rdu->nent;
- u = &rdu->ent;
- sb = file->f_path.dentry->d_sb;
- si_read_lock(sb, AuLock_FLUSH);
- while (nent-- > 0) {
- /* unnecessary to support mmap_sem since this is a dir */
- err = copy_from_user(&ent, u->e, sizeof(ent));
- if (!err)
- err = !access_ok(VERIFY_WRITE, &u->e->ino, sizeof(ino));
- if (unlikely(err)) {
- err = -EFAULT;
- AuTraceErr(err);
- break;
- }
-
- /* AuDbg("b%d, i%llu\n", ent.bindex, ent.ino); */
- if (!ent.wh)
- err = au_ino(sb, ent.bindex, ent.ino, ent.type, &ino);
- else
- err = au_wh_ino(sb, ent.bindex, ent.ino, ent.type,
- &ino);
- if (unlikely(err)) {
- AuTraceErr(err);
- break;
- }
-
- err = __put_user(ino, &u->e->ino);
- if (unlikely(err)) {
- err = -EFAULT;
- AuTraceErr(err);
- break;
- }
- u->ul += au_rdu_len(ent.nlen);
- }
- si_read_unlock(sb);
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static int au_rdu_verify(struct aufs_rdu *rdu)
-{
- AuDbg("rdu{%llu, %p, %u | %u | %llu, %u, %u | "
- "%llu, b%d, 0x%x, g%u}\n",
- rdu->sz, rdu->ent.e, rdu->verify[AufsCtlRduV_SZ],
- rdu->blk,
- rdu->rent, rdu->shwh, rdu->full,
- rdu->cookie.h_pos, rdu->cookie.bindex, rdu->cookie.flags,
- rdu->cookie.generation);
-
- if (rdu->verify[AufsCtlRduV_SZ] == sizeof(*rdu))
- return 0;
-
- AuDbg("%u:%u\n",
- rdu->verify[AufsCtlRduV_SZ], (unsigned int)sizeof(*rdu));
- return -EINVAL;
-}
-
-long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
- long err, e;
- struct aufs_rdu rdu;
- void __user *p = (void __user *)arg;
-
- err = copy_from_user(&rdu, p, sizeof(rdu));
- if (unlikely(err)) {
- err = -EFAULT;
- AuTraceErr(err);
- goto out;
- }
- err = au_rdu_verify(&rdu);
- if (unlikely(err))
- goto out;
-
- switch (cmd) {
- case AUFS_CTL_RDU:
- err = au_rdu(file, &rdu);
- if (unlikely(err))
- break;
-
- e = copy_to_user(p, &rdu, sizeof(rdu));
- if (unlikely(e)) {
- err = -EFAULT;
- AuTraceErr(err);
- }
- break;
- case AUFS_CTL_RDU_INO:
- err = au_rdu_ino(file, &rdu);
- break;
-
- default:
- /* err = -ENOTTY; */
- err = -EINVAL;
- }
-
-out:
- AuTraceErr(err);
- return err;
-}
-
-#ifdef CONFIG_COMPAT
-long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
- long err, e;
- struct aufs_rdu rdu;
- void __user *p = compat_ptr(arg);
-
- /* todo: get_user()? */
- err = copy_from_user(&rdu, p, sizeof(rdu));
- if (unlikely(err)) {
- err = -EFAULT;
- AuTraceErr(err);
- goto out;
- }
- rdu.ent.e = compat_ptr(rdu.ent.ul);
- err = au_rdu_verify(&rdu);
- if (unlikely(err))
- goto out;
-
- switch (cmd) {
- case AUFS_CTL_RDU:
- err = au_rdu(file, &rdu);
- if (unlikely(err))
- break;
-
- rdu.ent.ul = ptr_to_compat(rdu.ent.e);
- rdu.tail.ul = ptr_to_compat(rdu.tail.e);
- e = copy_to_user(p, &rdu, sizeof(rdu));
- if (unlikely(e)) {
- err = -EFAULT;
- AuTraceErr(err);
- }
- break;
- case AUFS_CTL_RDU_INO:
- err = au_rdu_ino(file, &rdu);
- break;
-
- default:
- /* err = -ENOTTY; */
- err = -EINVAL;
- }
-
-out:
- AuTraceErr(err);
- return err;
-}
-#endif
diff --git a/fs/aufs/rwsem.h b/fs/aufs/rwsem.h
deleted file mode 100644
index ef50c2ccb..000000000
--- a/fs/aufs/rwsem.h
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * simple read-write semaphore wrappers
- */
-
-#ifndef __AUFS_RWSEM_H__
-#define __AUFS_RWSEM_H__
-
-#ifdef __KERNEL__
-
-#include "debug.h"
-
-struct au_rwsem {
- struct rw_semaphore rwsem;
-#ifdef CONFIG_AUFS_DEBUG
- /* just for debugging, not almighty counter */
- atomic_t rcnt, wcnt;
-#endif
-};
-
-#ifdef CONFIG_AUFS_DEBUG
-#define AuDbgCntInit(rw) do { \
- atomic_set(&(rw)->rcnt, 0); \
- atomic_set(&(rw)->wcnt, 0); \
- smp_mb(); /* atomic set */ \
-} while (0)
-
-#define AuDbgRcntInc(rw) atomic_inc(&(rw)->rcnt)
-#define AuDbgRcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->rcnt) < 0)
-#define AuDbgWcntInc(rw) atomic_inc(&(rw)->wcnt)
-#define AuDbgWcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->wcnt) < 0)
-#else
-#define AuDbgCntInit(rw) do {} while (0)
-#define AuDbgRcntInc(rw) do {} while (0)
-#define AuDbgRcntDec(rw) do {} while (0)
-#define AuDbgWcntInc(rw) do {} while (0)
-#define AuDbgWcntDec(rw) do {} while (0)
-#endif /* CONFIG_AUFS_DEBUG */
-
-/* to debug easier, do not make them inlined functions */
-#define AuRwMustNoWaiters(rw) AuDebugOn(!list_empty(&(rw)->rwsem.wait_list))
-/* rwsem_is_locked() is unusable */
-#define AuRwMustReadLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0)
-#define AuRwMustWriteLock(rw) AuDebugOn(atomic_read(&(rw)->wcnt) <= 0)
-#define AuRwMustAnyLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0 \
- && atomic_read(&(rw)->wcnt) <= 0)
-#define AuRwDestroy(rw) AuDebugOn(atomic_read(&(rw)->rcnt) \
- || atomic_read(&(rw)->wcnt))
-
-#define au_rw_class(rw, key) lockdep_set_class(&(rw)->rwsem, key)
-
-static inline void au_rw_init(struct au_rwsem *rw)
-{
- AuDbgCntInit(rw);
- init_rwsem(&rw->rwsem);
-}
-
-static inline void au_rw_init_wlock(struct au_rwsem *rw)
-{
- au_rw_init(rw);
- down_write(&rw->rwsem);
- AuDbgWcntInc(rw);
-}
-
-static inline void au_rw_init_wlock_nested(struct au_rwsem *rw,
- unsigned int lsc)
-{
- au_rw_init(rw);
- down_write_nested(&rw->rwsem, lsc);
- AuDbgWcntInc(rw);
-}
-
-static inline void au_rw_read_lock(struct au_rwsem *rw)
-{
- down_read(&rw->rwsem);
- AuDbgRcntInc(rw);
-}
-
-static inline void au_rw_read_lock_nested(struct au_rwsem *rw, unsigned int lsc)
-{
- down_read_nested(&rw->rwsem, lsc);
- AuDbgRcntInc(rw);
-}
-
-static inline void au_rw_read_unlock(struct au_rwsem *rw)
-{
- AuRwMustReadLock(rw);
- AuDbgRcntDec(rw);
- up_read(&rw->rwsem);
-}
-
-static inline void au_rw_dgrade_lock(struct au_rwsem *rw)
-{
- AuRwMustWriteLock(rw);
- AuDbgRcntInc(rw);
- AuDbgWcntDec(rw);
- downgrade_write(&rw->rwsem);
-}
-
-static inline void au_rw_write_lock(struct au_rwsem *rw)
-{
- down_write(&rw->rwsem);
- AuDbgWcntInc(rw);
-}
-
-static inline void au_rw_write_lock_nested(struct au_rwsem *rw,
- unsigned int lsc)
-{
- down_write_nested(&rw->rwsem, lsc);
- AuDbgWcntInc(rw);
-}
-
-static inline void au_rw_write_unlock(struct au_rwsem *rw)
-{
- AuRwMustWriteLock(rw);
- AuDbgWcntDec(rw);
- up_write(&rw->rwsem);
-}
-
-/* why is not _nested version defined */
-static inline int au_rw_read_trylock(struct au_rwsem *rw)
-{
- int ret;
-
- ret = down_read_trylock(&rw->rwsem);
- if (ret)
- AuDbgRcntInc(rw);
- return ret;
-}
-
-static inline int au_rw_write_trylock(struct au_rwsem *rw)
-{
- int ret;
-
- ret = down_write_trylock(&rw->rwsem);
- if (ret)
- AuDbgWcntInc(rw);
- return ret;
-}
-
-#undef AuDbgCntInit
-#undef AuDbgRcntInc
-#undef AuDbgRcntDec
-#undef AuDbgWcntInc
-#undef AuDbgWcntDec
-
-#define AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
-static inline void prefix##_read_lock(param) \
-{ au_rw_read_lock(rwsem); } \
-static inline void prefix##_write_lock(param) \
-{ au_rw_write_lock(rwsem); } \
-static inline int prefix##_read_trylock(param) \
-{ return au_rw_read_trylock(rwsem); } \
-static inline int prefix##_write_trylock(param) \
-{ return au_rw_write_trylock(rwsem); }
-/* why is not _nested version defined */
-/* static inline void prefix##_read_trylock_nested(param, lsc)
-{ au_rw_read_trylock_nested(rwsem, lsc)); }
-static inline void prefix##_write_trylock_nestd(param, lsc)
-{ au_rw_write_trylock_nested(rwsem, lsc); } */
-
-#define AuSimpleUnlockRwsemFuncs(prefix, param, rwsem) \
-static inline void prefix##_read_unlock(param) \
-{ au_rw_read_unlock(rwsem); } \
-static inline void prefix##_write_unlock(param) \
-{ au_rw_write_unlock(rwsem); } \
-static inline void prefix##_downgrade_lock(param) \
-{ au_rw_dgrade_lock(rwsem); }
-
-#define AuSimpleRwsemFuncs(prefix, param, rwsem) \
- AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
- AuSimpleUnlockRwsemFuncs(prefix, param, rwsem)
-
-#endif /* __KERNEL__ */
-#endif /* __AUFS_RWSEM_H__ */
diff --git a/fs/aufs/sbinfo.c b/fs/aufs/sbinfo.c
deleted file mode 100644
index e3c58f643..000000000
--- a/fs/aufs/sbinfo.c
+++ /dev/null
@@ -1,353 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * superblock private data
- */
-
-#include "aufs.h"
-
-/*
- * they are necessary regardless sysfs is disabled.
- */
-void au_si_free(struct kobject *kobj)
-{
- int i;
- struct au_sbinfo *sbinfo;
- char *locked __maybe_unused; /* debug only */
-
- sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
- for (i = 0; i < AuPlink_NHASH; i++)
- AuDebugOn(!hlist_empty(&sbinfo->si_plink[i].head));
- AuDebugOn(atomic_read(&sbinfo->si_nowait.nw_len));
-
- AuDebugOn(!hlist_empty(&sbinfo->si_symlink.head));
-
- au_rw_write_lock(&sbinfo->si_rwsem);
- au_br_free(sbinfo);
- au_rw_write_unlock(&sbinfo->si_rwsem);
-
- AuDebugOn(radix_tree_gang_lookup
- (&sbinfo->au_si_pid.tree, (void **)&locked,
- /*first_index*/PID_MAX_DEFAULT - 1,
- /*max_items*/sizeof(locked)/sizeof(*locked)));
-
- kfree(sbinfo->si_branch);
- kfree(sbinfo->au_si_pid.bitmap);
- mutex_destroy(&sbinfo->si_xib_mtx);
- AuRwDestroy(&sbinfo->si_rwsem);
-
- kfree(sbinfo);
-}
-
-int au_si_alloc(struct super_block *sb)
-{
- int err, i;
- struct au_sbinfo *sbinfo;
- static struct lock_class_key aufs_si;
-
- err = -ENOMEM;
- sbinfo = kzalloc(sizeof(*sbinfo), GFP_NOFS);
- if (unlikely(!sbinfo))
- goto out;
-
- BUILD_BUG_ON(sizeof(unsigned long) !=
- sizeof(*sbinfo->au_si_pid.bitmap));
- sbinfo->au_si_pid.bitmap = kcalloc(BITS_TO_LONGS(PID_MAX_DEFAULT),
- sizeof(*sbinfo->au_si_pid.bitmap),
- GFP_NOFS);
- if (unlikely(!sbinfo->au_si_pid.bitmap))
- goto out_sbinfo;
-
- /* will be reallocated separately */
- sbinfo->si_branch = kzalloc(sizeof(*sbinfo->si_branch), GFP_NOFS);
- if (unlikely(!sbinfo->si_branch))
- goto out_pidmap;
-
- err = sysaufs_si_init(sbinfo);
- if (unlikely(err))
- goto out_br;
-
- au_nwt_init(&sbinfo->si_nowait);
- au_rw_init_wlock(&sbinfo->si_rwsem);
- au_rw_class(&sbinfo->si_rwsem, &aufs_si);
- spin_lock_init(&sbinfo->au_si_pid.tree_lock);
- INIT_RADIX_TREE(&sbinfo->au_si_pid.tree, GFP_ATOMIC | __GFP_NOFAIL);
-
- atomic_long_set(&sbinfo->si_ninodes, 0);
- atomic_long_set(&sbinfo->si_nfiles, 0);
-
- sbinfo->si_bend = -1;
- sbinfo->si_last_br_id = AUFS_BRANCH_MAX / 2;
-
- sbinfo->si_wbr_copyup = AuWbrCopyup_Def;
- sbinfo->si_wbr_create = AuWbrCreate_Def;
- sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + sbinfo->si_wbr_copyup;
- sbinfo->si_wbr_create_ops = au_wbr_create_ops + sbinfo->si_wbr_create;
-
- au_fhsm_init(sbinfo);
-
- sbinfo->si_mntflags = au_opts_plink(AuOpt_Def);
-
- au_sphl_init(&sbinfo->si_symlink);
-
- sbinfo->si_xino_jiffy = jiffies;
- sbinfo->si_xino_expire
- = msecs_to_jiffies(AUFS_XINO_DEF_SEC * MSEC_PER_SEC);
- mutex_init(&sbinfo->si_xib_mtx);
- sbinfo->si_xino_brid = -1;
- /* leave si_xib_last_pindex and si_xib_next_bit */
-
- au_sphl_init(&sbinfo->si_aopen);
-
- sbinfo->si_rdcache = msecs_to_jiffies(AUFS_RDCACHE_DEF * MSEC_PER_SEC);
- sbinfo->si_rdblk = AUFS_RDBLK_DEF;
- sbinfo->si_rdhash = AUFS_RDHASH_DEF;
- sbinfo->si_dirwh = AUFS_DIRWH_DEF;
-
- for (i = 0; i < AuPlink_NHASH; i++)
- au_sphl_init(sbinfo->si_plink + i);
- init_waitqueue_head(&sbinfo->si_plink_wq);
- spin_lock_init(&sbinfo->si_plink_maint_lock);
-
- au_sphl_init(&sbinfo->si_files);
-
- /* with getattr by default */
- sbinfo->si_iop_array = aufs_iop;
-
- /* leave other members for sysaufs and si_mnt. */
- sbinfo->si_sb = sb;
- sb->s_fs_info = sbinfo;
- si_pid_set(sb);
- return 0; /* success */
-
-out_br:
- kfree(sbinfo->si_branch);
-out_pidmap:
- kfree(sbinfo->au_si_pid.bitmap);
-out_sbinfo:
- kfree(sbinfo);
-out:
- return err;
-}
-
-int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr)
-{
- int err, sz;
- struct au_branch **brp;
-
- AuRwMustWriteLock(&sbinfo->si_rwsem);
-
- err = -ENOMEM;
- sz = sizeof(*brp) * (sbinfo->si_bend + 1);
- if (unlikely(!sz))
- sz = sizeof(*brp);
- brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS);
- if (brp) {
- sbinfo->si_branch = brp;
- err = 0;
- }
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-unsigned int au_sigen_inc(struct super_block *sb)
-{
- unsigned int gen;
- struct inode *inode;
-
- SiMustWriteLock(sb);
-
- gen = ++au_sbi(sb)->si_generation;
- au_update_digen(sb->s_root);
- inode = d_inode(sb->s_root);
- au_update_iigen(inode, /*half*/0);
- inode->i_version++;
- return gen;
-}
-
-aufs_bindex_t au_new_br_id(struct super_block *sb)
-{
- aufs_bindex_t br_id;
- int i;
- struct au_sbinfo *sbinfo;
-
- SiMustWriteLock(sb);
-
- sbinfo = au_sbi(sb);
- for (i = 0; i <= AUFS_BRANCH_MAX; i++) {
- br_id = ++sbinfo->si_last_br_id;
- AuDebugOn(br_id < 0);
- if (br_id && au_br_index(sb, br_id) < 0)
- return br_id;
- }
-
- return -1;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* it is ok that new 'nwt' tasks are appended while we are sleeping */
-int si_read_lock(struct super_block *sb, int flags)
-{
- int err;
-
- err = 0;
- if (au_ftest_lock(flags, FLUSH))
- au_nwt_flush(&au_sbi(sb)->si_nowait);
-
- si_noflush_read_lock(sb);
- err = au_plink_maint(sb, flags);
- if (unlikely(err))
- si_read_unlock(sb);
-
- return err;
-}
-
-int si_write_lock(struct super_block *sb, int flags)
-{
- int err;
-
- if (au_ftest_lock(flags, FLUSH))
- au_nwt_flush(&au_sbi(sb)->si_nowait);
-
- si_noflush_write_lock(sb);
- err = au_plink_maint(sb, flags);
- if (unlikely(err))
- si_write_unlock(sb);
-
- return err;
-}
-
-/* dentry and super_block lock. call at entry point */
-int aufs_read_lock(struct dentry *dentry, int flags)
-{
- int err;
- struct super_block *sb;
-
- sb = dentry->d_sb;
- err = si_read_lock(sb, flags);
- if (unlikely(err))
- goto out;
-
- if (au_ftest_lock(flags, DW))
- di_write_lock_child(dentry);
- else
- di_read_lock_child(dentry, flags);
-
- if (au_ftest_lock(flags, GEN)) {
- err = au_digen_test(dentry, au_sigen(sb));
- if (!au_opt_test(au_mntflags(sb), UDBA_NONE))
- AuDebugOn(!err && au_dbrange_test(dentry));
- else if (!err)
- err = au_dbrange_test(dentry);
- if (unlikely(err))
- aufs_read_unlock(dentry, flags);
- }
-
-out:
- return err;
-}
-
-void aufs_read_unlock(struct dentry *dentry, int flags)
-{
- if (au_ftest_lock(flags, DW))
- di_write_unlock(dentry);
- else
- di_read_unlock(dentry, flags);
- si_read_unlock(dentry->d_sb);
-}
-
-void aufs_write_lock(struct dentry *dentry)
-{
- si_write_lock(dentry->d_sb, AuLock_FLUSH | AuLock_NOPLMW);
- di_write_lock_child(dentry);
-}
-
-void aufs_write_unlock(struct dentry *dentry)
-{
- di_write_unlock(dentry);
- si_write_unlock(dentry->d_sb);
-}
-
-int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags)
-{
- int err;
- unsigned int sigen;
- struct super_block *sb;
-
- sb = d1->d_sb;
- err = si_read_lock(sb, flags);
- if (unlikely(err))
- goto out;
-
- di_write_lock2_child(d1, d2, au_ftest_lock(flags, DIRS));
-
- if (au_ftest_lock(flags, GEN)) {
- sigen = au_sigen(sb);
- err = au_digen_test(d1, sigen);
- AuDebugOn(!err && au_dbrange_test(d1));
- if (!err) {
- err = au_digen_test(d2, sigen);
- AuDebugOn(!err && au_dbrange_test(d2));
- }
- if (unlikely(err))
- aufs_read_and_write_unlock2(d1, d2);
- }
-
-out:
- return err;
-}
-
-void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2)
-{
- di_write_unlock2(d1, d2);
- si_read_unlock(d1->d_sb);
-}
-
-/* ---------------------------------------------------------------------- */
-
-int si_pid_test_slow(struct super_block *sb)
-{
- void *p;
-
- rcu_read_lock();
- p = radix_tree_lookup(&au_sbi(sb)->au_si_pid.tree, current->pid);
- rcu_read_unlock();
-
- return (long)!!p;
-}
-
-void si_pid_set_slow(struct super_block *sb)
-{
- int err;
- struct au_sbinfo *sbinfo;
-
- AuDebugOn(si_pid_test_slow(sb));
-
- sbinfo = au_sbi(sb);
- err = radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
- AuDebugOn(err);
- spin_lock(&sbinfo->au_si_pid.tree_lock);
- err = radix_tree_insert(&sbinfo->au_si_pid.tree, current->pid,
- /*any valid ptr*/sb);
- spin_unlock(&sbinfo->au_si_pid.tree_lock);
- AuDebugOn(err);
- radix_tree_preload_end();
-}
-
-void si_pid_clr_slow(struct super_block *sb)
-{
- void *p;
- struct au_sbinfo *sbinfo;
-
- AuDebugOn(!si_pid_test_slow(sb));
-
- sbinfo = au_sbi(sb);
- spin_lock(&sbinfo->au_si_pid.tree_lock);
- p = radix_tree_delete(&sbinfo->au_si_pid.tree, current->pid);
- spin_unlock(&sbinfo->au_si_pid.tree_lock);
-}
diff --git a/fs/aufs/spl.h b/fs/aufs/spl.h
deleted file mode 100644
index f9b528826..000000000
--- a/fs/aufs/spl.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * simple list protected by a spinlock
- */
-
-#ifndef __AUFS_SPL_H__
-#define __AUFS_SPL_H__
-
-#ifdef __KERNEL__
-
-struct au_splhead {
- spinlock_t spin;
- struct list_head head;
-};
-
-static inline void au_spl_init(struct au_splhead *spl)
-{
- spin_lock_init(&spl->spin);
- INIT_LIST_HEAD(&spl->head);
-}
-
-static inline void au_spl_add(struct list_head *list, struct au_splhead *spl)
-{
- spin_lock(&spl->spin);
- list_add(list, &spl->head);
- spin_unlock(&spl->spin);
-}
-
-static inline void au_spl_del(struct list_head *list, struct au_splhead *spl)
-{
- spin_lock(&spl->spin);
- list_del(list);
- spin_unlock(&spl->spin);
-}
-
-static inline void au_spl_del_rcu(struct list_head *list,
- struct au_splhead *spl)
-{
- spin_lock(&spl->spin);
- list_del_rcu(list);
- spin_unlock(&spl->spin);
-}
-
-/* ---------------------------------------------------------------------- */
-
-struct au_sphlhead {
- spinlock_t spin;
- struct hlist_head head;
-};
-
-static inline void au_sphl_init(struct au_sphlhead *sphl)
-{
- spin_lock_init(&sphl->spin);
- INIT_HLIST_HEAD(&sphl->head);
-}
-
-static inline void au_sphl_add(struct hlist_node *hlist,
- struct au_sphlhead *sphl)
-{
- spin_lock(&sphl->spin);
- hlist_add_head(hlist, &sphl->head);
- spin_unlock(&sphl->spin);
-}
-
-static inline void au_sphl_del(struct hlist_node *hlist,
- struct au_sphlhead *sphl)
-{
- spin_lock(&sphl->spin);
- hlist_del(hlist);
- spin_unlock(&sphl->spin);
-}
-
-static inline void au_sphl_del_rcu(struct hlist_node *hlist,
- struct au_sphlhead *sphl)
-{
- spin_lock(&sphl->spin);
- hlist_del_rcu(hlist);
- spin_unlock(&sphl->spin);
-}
-
-static inline unsigned long au_sphl_count(struct au_sphlhead *sphl)
-{
- unsigned long cnt;
- struct hlist_node *pos;
-
- cnt = 0;
- spin_lock(&sphl->spin);
- hlist_for_each(pos, &sphl->head)
- cnt++;
- spin_unlock(&sphl->spin);
- return cnt;
-}
-
-#endif /* __KERNEL__ */
-#endif /* __AUFS_SPL_H__ */
diff --git a/fs/aufs/super.c b/fs/aufs/super.c
deleted file mode 100644
index b41d78913..000000000
--- a/fs/aufs/super.c
+++ /dev/null
@@ -1,1026 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * mount and super_block operations
- */
-
-#include <linux/mm.h>
-#include <linux/seq_file.h>
-#include <linux/statfs.h>
-#include <linux/vmalloc.h>
-#include "aufs.h"
-
-/*
- * super_operations
- */
-static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused)
-{
- struct au_icntnr *c;
-
- c = au_cache_alloc_icntnr();
- if (c) {
- au_icntnr_init(c);
- c->vfs_inode.i_version = 1; /* sigen(sb); */
- c->iinfo.ii_hinode = NULL;
- return &c->vfs_inode;
- }
- return NULL;
-}
-
-static void aufs_destroy_inode_cb(struct rcu_head *head)
-{
- struct inode *inode = container_of(head, struct inode, i_rcu);
-
- INIT_HLIST_HEAD(&inode->i_dentry);
- au_cache_free_icntnr(container_of(inode, struct au_icntnr, vfs_inode));
-}
-
-static void aufs_destroy_inode(struct inode *inode)
-{
- au_iinfo_fin(inode);
- call_rcu(&inode->i_rcu, aufs_destroy_inode_cb);
-}
-
-struct inode *au_iget_locked(struct super_block *sb, ino_t ino)
-{
- struct inode *inode;
- int err;
-
- inode = iget_locked(sb, ino);
- if (unlikely(!inode)) {
- inode = ERR_PTR(-ENOMEM);
- goto out;
- }
- if (!(inode->i_state & I_NEW))
- goto out;
-
- err = au_xigen_new(inode);
- if (!err)
- err = au_iinfo_init(inode);
- if (!err)
- inode->i_version++;
- else {
- iget_failed(inode);
- inode = ERR_PTR(err);
- }
-
-out:
- /* never return NULL */
- AuDebugOn(!inode);
- AuTraceErrPtr(inode);
- return inode;
-}
-
-/* lock free root dinfo */
-static int au_show_brs(struct seq_file *seq, struct super_block *sb)
-{
- int err;
- aufs_bindex_t bindex, bend;
- struct path path;
- struct au_hdentry *hdp;
- struct au_branch *br;
- au_br_perm_str_t perm;
-
- err = 0;
- bend = au_sbend(sb);
- hdp = au_di(sb->s_root)->di_hdentry;
- for (bindex = 0; !err && bindex <= bend; bindex++) {
- br = au_sbr(sb, bindex);
- path.mnt = au_br_mnt(br);
- path.dentry = hdp[bindex].hd_dentry;
- err = au_seq_path(seq, &path);
- if (!err) {
- au_optstr_br_perm(&perm, br->br_perm);
- seq_printf(seq, "=%s", perm.a);
- if (bindex != bend)
- seq_putc(seq, ':');
- }
- }
- if (unlikely(err || seq_has_overflowed(seq)))
- err = -E2BIG;
-
- return err;
-}
-
-static void au_show_wbr_create(struct seq_file *m, int v,
- struct au_sbinfo *sbinfo)
-{
- const char *pat;
-
- AuRwMustAnyLock(&sbinfo->si_rwsem);
-
- seq_puts(m, ",create=");
- pat = au_optstr_wbr_create(v);
- switch (v) {
- case AuWbrCreate_TDP:
- case AuWbrCreate_RR:
- case AuWbrCreate_MFS:
- case AuWbrCreate_PMFS:
- seq_puts(m, pat);
- break;
- case AuWbrCreate_MFSV:
- seq_printf(m, /*pat*/"mfs:%lu",
- jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
- / MSEC_PER_SEC);
- break;
- case AuWbrCreate_PMFSV:
- seq_printf(m, /*pat*/"pmfs:%lu",
- jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
- / MSEC_PER_SEC);
- break;
- case AuWbrCreate_MFSRR:
- seq_printf(m, /*pat*/"mfsrr:%llu",
- sbinfo->si_wbr_mfs.mfsrr_watermark);
- break;
- case AuWbrCreate_MFSRRV:
- seq_printf(m, /*pat*/"mfsrr:%llu:%lu",
- sbinfo->si_wbr_mfs.mfsrr_watermark,
- jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
- / MSEC_PER_SEC);
- break;
- case AuWbrCreate_PMFSRR:
- seq_printf(m, /*pat*/"pmfsrr:%llu",
- sbinfo->si_wbr_mfs.mfsrr_watermark);
- break;
- case AuWbrCreate_PMFSRRV:
- seq_printf(m, /*pat*/"pmfsrr:%llu:%lu",
- sbinfo->si_wbr_mfs.mfsrr_watermark,
- jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
- / MSEC_PER_SEC);
- break;
- }
-}
-
-static int au_show_xino(struct seq_file *seq, struct super_block *sb)
-{
-#ifdef CONFIG_SYSFS
- return 0;
-#else
- int err;
- const int len = sizeof(AUFS_XINO_FNAME) - 1;
- aufs_bindex_t bindex, brid;
- struct qstr *name;
- struct file *f;
- struct dentry *d, *h_root;
- struct au_hdentry *hdp;
-
- AuRwMustAnyLock(&sbinfo->si_rwsem);
-
- err = 0;
- f = au_sbi(sb)->si_xib;
- if (!f)
- goto out;
-
- /* stop printing the default xino path on the first writable branch */
- h_root = NULL;
- brid = au_xino_brid(sb);
- if (brid >= 0) {
- bindex = au_br_index(sb, brid);
- hdp = au_di(sb->s_root)->di_hdentry;
- h_root = hdp[0 + bindex].hd_dentry;
- }
- d = f->f_path.dentry;
- name = &d->d_name;
- /* safe ->d_parent because the file is unlinked */
- if (d->d_parent == h_root
- && name->len == len
- && !memcmp(name->name, AUFS_XINO_FNAME, len))
- goto out;
-
- seq_puts(seq, ",xino=");
- err = au_xino_path(seq, f);
-
-out:
- return err;
-#endif
-}
-
-/* seq_file will re-call me in case of too long string */
-static int aufs_show_options(struct seq_file *m, struct dentry *dentry)
-{
- int err;
- unsigned int mnt_flags, v;
- struct super_block *sb;
- struct au_sbinfo *sbinfo;
-
-#define AuBool(name, str) do { \
- v = au_opt_test(mnt_flags, name); \
- if (v != au_opt_test(AuOpt_Def, name)) \
- seq_printf(m, ",%s" #str, v ? "" : "no"); \
-} while (0)
-
-#define AuStr(name, str) do { \
- v = mnt_flags & AuOptMask_##name; \
- if (v != (AuOpt_Def & AuOptMask_##name)) \
- seq_printf(m, "," #str "=%s", au_optstr_##str(v)); \
-} while (0)
-
-#define AuUInt(name, str, val) do { \
- if (val != AUFS_##name##_DEF) \
- seq_printf(m, "," #str "=%u", val); \
-} while (0)
-
- sb = dentry->d_sb;
- if (sb->s_flags & MS_POSIXACL)
- seq_puts(m, ",acl");
-
- /* lock free root dinfo */
- si_noflush_read_lock(sb);
- sbinfo = au_sbi(sb);
- seq_printf(m, ",si=%lx", sysaufs_si_id(sbinfo));
-
- mnt_flags = au_mntflags(sb);
- if (au_opt_test(mnt_flags, XINO)) {
- err = au_show_xino(m, sb);
- if (unlikely(err))
- goto out;
- } else
- seq_puts(m, ",noxino");
-
- AuBool(TRUNC_XINO, trunc_xino);
- AuStr(UDBA, udba);
- AuBool(SHWH, shwh);
- AuBool(PLINK, plink);
- AuBool(DIO, dio);
- AuBool(DIRPERM1, dirperm1);
-
- v = sbinfo->si_wbr_create;
- if (v != AuWbrCreate_Def)
- au_show_wbr_create(m, v, sbinfo);
-
- v = sbinfo->si_wbr_copyup;
- if (v != AuWbrCopyup_Def)
- seq_printf(m, ",cpup=%s", au_optstr_wbr_copyup(v));
-
- v = au_opt_test(mnt_flags, ALWAYS_DIROPQ);
- if (v != au_opt_test(AuOpt_Def, ALWAYS_DIROPQ))
- seq_printf(m, ",diropq=%c", v ? 'a' : 'w');
-
- AuUInt(DIRWH, dirwh, sbinfo->si_dirwh);
-
- v = jiffies_to_msecs(sbinfo->si_rdcache) / MSEC_PER_SEC;
- AuUInt(RDCACHE, rdcache, v);
-
- AuUInt(RDBLK, rdblk, sbinfo->si_rdblk);
- AuUInt(RDHASH, rdhash, sbinfo->si_rdhash);
-
- au_fhsm_show(m, sbinfo);
-
- AuBool(SUM, sum);
- /* AuBool(SUM_W, wsum); */
- AuBool(WARN_PERM, warn_perm);
- AuBool(VERBOSE, verbose);
-
-out:
- /* be sure to print "br:" last */
- if (!sysaufs_brs) {
- seq_puts(m, ",br:");
- au_show_brs(m, sb);
- }
- si_read_unlock(sb);
- return 0;
-
-#undef AuBool
-#undef AuStr
-#undef AuUInt
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* sum mode which returns the summation for statfs(2) */
-
-static u64 au_add_till_max(u64 a, u64 b)
-{
- u64 old;
-
- old = a;
- a += b;
- if (old <= a)
- return a;
- return ULLONG_MAX;
-}
-
-static u64 au_mul_till_max(u64 a, long mul)
-{
- u64 old;
-
- old = a;
- a *= mul;
- if (old <= a)
- return a;
- return ULLONG_MAX;
-}
-
-static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
-{
- int err;
- long bsize, factor;
- u64 blocks, bfree, bavail, files, ffree;
- aufs_bindex_t bend, bindex, i;
- unsigned char shared;
- struct path h_path;
- struct super_block *h_sb;
-
- err = 0;
- bsize = LONG_MAX;
- files = 0;
- ffree = 0;
- blocks = 0;
- bfree = 0;
- bavail = 0;
- bend = au_sbend(sb);
- for (bindex = 0; bindex <= bend; bindex++) {
- h_path.mnt = au_sbr_mnt(sb, bindex);
- h_sb = h_path.mnt->mnt_sb;
- shared = 0;
- for (i = 0; !shared && i < bindex; i++)
- shared = (au_sbr_sb(sb, i) == h_sb);
- if (shared)
- continue;
-
- /* sb->s_root for NFS is unreliable */
- h_path.dentry = h_path.mnt->mnt_root;
- err = vfs_statfs(&h_path, buf);
- if (unlikely(err))
- goto out;
-
- if (bsize > buf->f_bsize) {
- /*
- * we will reduce bsize, so we have to expand blocks
- * etc. to match them again
- */
- factor = (bsize / buf->f_bsize);
- blocks = au_mul_till_max(blocks, factor);
- bfree = au_mul_till_max(bfree, factor);
- bavail = au_mul_till_max(bavail, factor);
- bsize = buf->f_bsize;
- }
-
- factor = (buf->f_bsize / bsize);
- blocks = au_add_till_max(blocks,
- au_mul_till_max(buf->f_blocks, factor));
- bfree = au_add_till_max(bfree,
- au_mul_till_max(buf->f_bfree, factor));
- bavail = au_add_till_max(bavail,
- au_mul_till_max(buf->f_bavail, factor));
- files = au_add_till_max(files, buf->f_files);
- ffree = au_add_till_max(ffree, buf->f_ffree);
- }
-
- buf->f_bsize = bsize;
- buf->f_blocks = blocks;
- buf->f_bfree = bfree;
- buf->f_bavail = bavail;
- buf->f_files = files;
- buf->f_ffree = ffree;
- buf->f_frsize = 0;
-
-out:
- return err;
-}
-
-static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf)
-{
- int err;
- struct path h_path;
- struct super_block *sb;
-
- /* lock free root dinfo */
- sb = dentry->d_sb;
- si_noflush_read_lock(sb);
- if (!au_opt_test(au_mntflags(sb), SUM)) {
- /* sb->s_root for NFS is unreliable */
- h_path.mnt = au_sbr_mnt(sb, 0);
- h_path.dentry = h_path.mnt->mnt_root;
- err = vfs_statfs(&h_path, buf);
- } else
- err = au_statfs_sum(sb, buf);
- si_read_unlock(sb);
-
- if (!err) {
- buf->f_type = AUFS_SUPER_MAGIC;
- buf->f_namelen = AUFS_MAX_NAMELEN;
- memset(&buf->f_fsid, 0, sizeof(buf->f_fsid));
- }
- /* buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; */
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static int aufs_sync_fs(struct super_block *sb, int wait)
-{
- int err, e;
- aufs_bindex_t bend, bindex;
- struct au_branch *br;
- struct super_block *h_sb;
-
- err = 0;
- si_noflush_read_lock(sb);
- bend = au_sbend(sb);
- for (bindex = 0; bindex <= bend; bindex++) {
- br = au_sbr(sb, bindex);
- if (!au_br_writable(br->br_perm))
- continue;
-
- h_sb = au_sbr_sb(sb, bindex);
- if (h_sb->s_op->sync_fs) {
- e = h_sb->s_op->sync_fs(h_sb, wait);
- if (unlikely(e && !err))
- err = e;
- /* go on even if an error happens */
- }
- }
- si_read_unlock(sb);
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* final actions when unmounting a file system */
-static void aufs_put_super(struct super_block *sb)
-{
- struct au_sbinfo *sbinfo;
-
- sbinfo = au_sbi(sb);
- if (!sbinfo)
- return;
-
- dbgaufs_si_fin(sbinfo);
- kobject_put(&sbinfo->si_kobj);
-}
-
-/* ---------------------------------------------------------------------- */
-
-void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb,
- struct super_block *sb, void *arg)
-{
- void *array;
- unsigned long long n, sz;
-
- array = NULL;
- n = 0;
- if (!*hint)
- goto out;
-
- if (*hint > ULLONG_MAX / sizeof(array)) {
- array = ERR_PTR(-EMFILE);
- pr_err("hint %llu\n", *hint);
- goto out;
- }
-
- sz = sizeof(array) * *hint;
- array = kzalloc(sz, GFP_NOFS);
- if (unlikely(!array))
- array = vzalloc(sz);
- if (unlikely(!array)) {
- array = ERR_PTR(-ENOMEM);
- goto out;
- }
-
- n = cb(sb, array, *hint, arg);
- AuDebugOn(n > *hint);
-
-out:
- *hint = n;
- return array;
-}
-
-static unsigned long long au_iarray_cb(struct super_block *sb, void *a,
- unsigned long long max __maybe_unused,
- void *arg)
-{
- unsigned long long n;
- struct inode **p, *inode;
- struct list_head *head;
-
- n = 0;
- p = a;
- head = arg;
- spin_lock(&sb->s_inode_list_lock);
- list_for_each_entry(inode, head, i_sb_list) {
- if (!is_bad_inode(inode)
- && au_ii(inode)->ii_bstart >= 0) {
- spin_lock(&inode->i_lock);
- if (atomic_read(&inode->i_count)) {
- au_igrab(inode);
- *p++ = inode;
- n++;
- AuDebugOn(n > max);
- }
- spin_unlock(&inode->i_lock);
- }
- }
- spin_unlock(&sb->s_inode_list_lock);
-
- return n;
-}
-
-struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max)
-{
- *max = atomic_long_read(&au_sbi(sb)->si_ninodes);
- return au_array_alloc(max, au_iarray_cb, sb, &sb->s_inodes);
-}
-
-void au_iarray_free(struct inode **a, unsigned long long max)
-{
- unsigned long long ull;
-
- for (ull = 0; ull < max; ull++)
- iput(a[ull]);
- kvfree(a);
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * refresh dentry and inode at remount time.
- */
-/* todo: consolidate with simple_reval_dpath() and au_reval_for_attr() */
-static int au_do_refresh(struct dentry *dentry, unsigned int dir_flags,
- struct dentry *parent)
-{
- int err;
-
- di_write_lock_child(dentry);
- di_read_lock_parent(parent, AuLock_IR);
- err = au_refresh_dentry(dentry, parent);
- if (!err && dir_flags)
- au_hn_reset(d_inode(dentry), dir_flags);
- di_read_unlock(parent, AuLock_IR);
- di_write_unlock(dentry);
-
- return err;
-}
-
-static int au_do_refresh_d(struct dentry *dentry, unsigned int sigen,
- struct au_sbinfo *sbinfo,
- const unsigned int dir_flags, unsigned int do_idop)
-{
- int err;
- struct dentry *parent;
-
- err = 0;
- parent = dget_parent(dentry);
- if (!au_digen_test(parent, sigen) && au_digen_test(dentry, sigen)) {
- if (d_really_is_positive(dentry)) {
- if (!d_is_dir(dentry))
- err = au_do_refresh(dentry, /*dir_flags*/0,
- parent);
- else {
- err = au_do_refresh(dentry, dir_flags, parent);
- if (unlikely(err))
- au_fset_si(sbinfo, FAILED_REFRESH_DIR);
- }
- } else
- err = au_do_refresh(dentry, /*dir_flags*/0, parent);
- AuDbgDentry(dentry);
- }
- dput(parent);
-
- if (!err) {
- if (do_idop)
- au_refresh_dop(dentry, /*force_reval*/0);
- } else
- au_refresh_dop(dentry, /*force_reval*/1);
-
- AuTraceErr(err);
- return err;
-}
-
-static int au_refresh_d(struct super_block *sb, unsigned int do_idop)
-{
- int err, i, j, ndentry, e;
- unsigned int sigen;
- struct au_dcsub_pages dpages;
- struct au_dpage *dpage;
- struct dentry **dentries, *d;
- struct au_sbinfo *sbinfo;
- struct dentry *root = sb->s_root;
- const unsigned int dir_flags = au_hi_flags(d_inode(root), /*isdir*/1);
-
- if (do_idop)
- au_refresh_dop(root, /*force_reval*/0);
-
- err = au_dpages_init(&dpages, GFP_NOFS);
- if (unlikely(err))
- goto out;
- err = au_dcsub_pages(&dpages, root, NULL, NULL);
- if (unlikely(err))
- goto out_dpages;
-
- sigen = au_sigen(sb);
- sbinfo = au_sbi(sb);
- for (i = 0; i < dpages.ndpage; i++) {
- dpage = dpages.dpages + i;
- dentries = dpage->dentries;
- ndentry = dpage->ndentry;
- for (j = 0; j < ndentry; j++) {
- d = dentries[j];
- e = au_do_refresh_d(d, sigen, sbinfo, dir_flags,
- do_idop);
- if (unlikely(e && !err))
- err = e;
- /* go on even err */
- }
- }
-
-out_dpages:
- au_dpages_free(&dpages);
-out:
- return err;
-}
-
-static int au_refresh_i(struct super_block *sb, unsigned int do_idop)
-{
- int err, e;
- unsigned int sigen;
- unsigned long long max, ull;
- struct inode *inode, **array;
-
- array = au_iarray_alloc(sb, &max);
- err = PTR_ERR(array);
- if (IS_ERR(array))
- goto out;
-
- err = 0;
- sigen = au_sigen(sb);
- for (ull = 0; ull < max; ull++) {
- inode = array[ull];
- if (unlikely(!inode))
- break;
-
- e = 0;
- ii_write_lock_child(inode);
- if (au_iigen(inode, NULL) != sigen) {
- e = au_refresh_hinode_self(inode);
- if (unlikely(e)) {
- au_refresh_iop(inode, /*force_getattr*/1);
- pr_err("error %d, i%lu\n", e, inode->i_ino);
- if (!err)
- err = e;
- /* go on even if err */
- }
- }
- if (!e && do_idop)
- au_refresh_iop(inode, /*force_getattr*/0);
- ii_write_unlock(inode);
- }
-
- au_iarray_free(array, max);
-
-out:
- return err;
-}
-
-static void au_remount_refresh(struct super_block *sb, unsigned int do_idop)
-{
- int err, e;
- unsigned int udba;
- aufs_bindex_t bindex, bend;
- struct dentry *root;
- struct inode *inode;
- struct au_branch *br;
- struct au_sbinfo *sbi;
-
- au_sigen_inc(sb);
- sbi = au_sbi(sb);
- au_fclr_si(sbi, FAILED_REFRESH_DIR);
-
- root = sb->s_root;
- DiMustNoWaiters(root);
- inode = d_inode(root);
- IiMustNoWaiters(inode);
-
- udba = au_opt_udba(sb);
- bend = au_sbend(sb);
- for (bindex = 0; bindex <= bend; bindex++) {
- br = au_sbr(sb, bindex);
- err = au_hnotify_reset_br(udba, br, br->br_perm);
- if (unlikely(err))
- AuIOErr("hnotify failed on br %d, %d, ignored\n",
- bindex, err);
- /* go on even if err */
- }
- au_hn_reset(inode, au_hi_flags(inode, /*isdir*/1));
-
- if (do_idop) {
- if (au_ftest_si(sbi, NO_DREVAL)) {
- AuDebugOn(sb->s_d_op == &aufs_dop_noreval);
- sb->s_d_op = &aufs_dop_noreval;
- AuDebugOn(sbi->si_iop_array == aufs_iop_nogetattr);
- sbi->si_iop_array = aufs_iop_nogetattr;
- } else {
- AuDebugOn(sb->s_d_op == &aufs_dop);
- sb->s_d_op = &aufs_dop;
- AuDebugOn(sbi->si_iop_array == aufs_iop);
- sbi->si_iop_array = aufs_iop;
- }
- pr_info("reset to %pf and %pf\n",
- sb->s_d_op, sbi->si_iop_array);
- }
-
- di_write_unlock(root);
- err = au_refresh_d(sb, do_idop);
- e = au_refresh_i(sb, do_idop);
- if (unlikely(e && !err))
- err = e;
- /* aufs_write_lock() calls ..._child() */
- di_write_lock_child(root);
-
- au_cpup_attr_all(inode, /*force*/1);
-
- if (unlikely(err))
- AuIOErr("refresh failed, ignored, %d\n", err);
-}
-
-/* stop extra interpretation of errno in mount(8), and strange error messages */
-static int cvt_err(int err)
-{
- AuTraceErr(err);
-
- switch (err) {
- case -ENOENT:
- case -ENOTDIR:
- case -EEXIST:
- case -EIO:
- err = -EINVAL;
- }
- return err;
-}
-
-static int aufs_remount_fs(struct super_block *sb, int *flags, char *data)
-{
- int err, do_dx;
- unsigned int mntflags;
- struct au_opts opts = {
- .opt = NULL
- };
- struct dentry *root;
- struct inode *inode;
- struct au_sbinfo *sbinfo;
-
- err = 0;
- root = sb->s_root;
- if (!data || !*data) {
- err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
- if (!err) {
- di_write_lock_child(root);
- err = au_opts_verify(sb, *flags, /*pending*/0);
- aufs_write_unlock(root);
- }
- goto out;
- }
-
- err = -ENOMEM;
- opts.opt = (void *)__get_free_page(GFP_NOFS);
- if (unlikely(!opts.opt))
- goto out;
- opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
- opts.flags = AuOpts_REMOUNT;
- opts.sb_flags = *flags;
-
- /* parse it before aufs lock */
- err = au_opts_parse(sb, data, &opts);
- if (unlikely(err))
- goto out_opts;
-
- sbinfo = au_sbi(sb);
- inode = d_inode(root);
- mutex_lock(&inode->i_mutex);
- err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
- if (unlikely(err))
- goto out_mtx;
- di_write_lock_child(root);
-
- /* au_opts_remount() may return an error */
- err = au_opts_remount(sb, &opts);
- au_opts_free(&opts);
-
- if (au_ftest_opts(opts.flags, REFRESH))
- au_remount_refresh(sb, au_ftest_opts(opts.flags, REFRESH_IDOP));
-
- if (au_ftest_opts(opts.flags, REFRESH_DYAOP)) {
- mntflags = au_mntflags(sb);
- do_dx = !!au_opt_test(mntflags, DIO);
- au_dy_arefresh(do_dx);
- }
-
- au_fhsm_wrote_all(sb, /*force*/1); /* ?? */
- aufs_write_unlock(root);
-
-out_mtx:
- mutex_unlock(&inode->i_mutex);
-out_opts:
- free_page((unsigned long)opts.opt);
-out:
- err = cvt_err(err);
- AuTraceErr(err);
- return err;
-}
-
-static const struct super_operations aufs_sop = {
- .alloc_inode = aufs_alloc_inode,
- .destroy_inode = aufs_destroy_inode,
- /* always deleting, no clearing */
- .drop_inode = generic_delete_inode,
- .show_options = aufs_show_options,
- .statfs = aufs_statfs,
- .put_super = aufs_put_super,
- .sync_fs = aufs_sync_fs,
- .remount_fs = aufs_remount_fs
-};
-
-/* ---------------------------------------------------------------------- */
-
-static int alloc_root(struct super_block *sb)
-{
- int err;
- struct inode *inode;
- struct dentry *root;
-
- err = -ENOMEM;
- inode = au_iget_locked(sb, AUFS_ROOT_INO);
- err = PTR_ERR(inode);
- if (IS_ERR(inode))
- goto out;
-
- inode->i_op = aufs_iop + AuIop_DIR; /* with getattr by default */
- inode->i_fop = &aufs_dir_fop;
- inode->i_mode = S_IFDIR;
- set_nlink(inode, 2);
- unlock_new_inode(inode);
-
- root = d_make_root(inode);
- if (unlikely(!root))
- goto out;
- err = PTR_ERR(root);
- if (IS_ERR(root))
- goto out;
-
- err = au_di_init(root);
- if (!err) {
- sb->s_root = root;
- return 0; /* success */
- }
- dput(root);
-
-out:
- return err;
-}
-
-static int aufs_fill_super(struct super_block *sb, void *raw_data,
- int silent __maybe_unused)
-{
- int err;
- struct au_opts opts = {
- .opt = NULL
- };
- struct au_sbinfo *sbinfo;
- struct dentry *root;
- struct inode *inode;
- char *arg = raw_data;
-
- if (unlikely(!arg || !*arg)) {
- err = -EINVAL;
- pr_err("no arg\n");
- goto out;
- }
-
- err = -ENOMEM;
- opts.opt = (void *)__get_free_page(GFP_NOFS);
- if (unlikely(!opts.opt))
- goto out;
- opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
- opts.sb_flags = sb->s_flags;
-
- err = au_si_alloc(sb);
- if (unlikely(err))
- goto out_opts;
- sbinfo = au_sbi(sb);
-
- /* all timestamps always follow the ones on the branch */
- sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
- sb->s_op = &aufs_sop;
- sb->s_d_op = &aufs_dop;
- sb->s_magic = AUFS_SUPER_MAGIC;
- sb->s_maxbytes = 0;
- sb->s_stack_depth = 1;
- au_export_init(sb);
- /* au_xattr_init(sb); */
-
- err = alloc_root(sb);
- if (unlikely(err)) {
- si_write_unlock(sb);
- goto out_info;
- }
- root = sb->s_root;
- inode = d_inode(root);
-
- /*
- * actually we can parse options regardless aufs lock here.
- * but at remount time, parsing must be done before aufs lock.
- * so we follow the same rule.
- */
- ii_write_lock_parent(inode);
- aufs_write_unlock(root);
- err = au_opts_parse(sb, arg, &opts);
- if (unlikely(err))
- goto out_root;
-
- /* lock vfs_inode first, then aufs. */
- mutex_lock(&inode->i_mutex);
- aufs_write_lock(root);
- err = au_opts_mount(sb, &opts);
- au_opts_free(&opts);
- if (!err && au_ftest_si(sbinfo, NO_DREVAL)) {
- sb->s_d_op = &aufs_dop_noreval;
- pr_info("%pf\n", sb->s_d_op);
- au_refresh_dop(root, /*force_reval*/0);
- sbinfo->si_iop_array = aufs_iop_nogetattr;
- au_refresh_iop(inode, /*force_getattr*/0);
- }
- aufs_write_unlock(root);
- mutex_unlock(&inode->i_mutex);
- if (!err)
- goto out_opts; /* success */
-
-out_root:
- dput(root);
- sb->s_root = NULL;
-out_info:
- dbgaufs_si_fin(sbinfo);
- kobject_put(&sbinfo->si_kobj);
- sb->s_fs_info = NULL;
-out_opts:
- free_page((unsigned long)opts.opt);
-out:
- AuTraceErr(err);
- err = cvt_err(err);
- AuTraceErr(err);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static struct dentry *aufs_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name __maybe_unused,
- void *raw_data)
-{
- struct dentry *root;
- struct super_block *sb;
-
- /* all timestamps always follow the ones on the branch */
- /* mnt->mnt_flags |= MNT_NOATIME | MNT_NODIRATIME; */
- root = mount_nodev(fs_type, flags, raw_data, aufs_fill_super);
- if (IS_ERR(root))
- goto out;
-
- sb = root->d_sb;
- si_write_lock(sb, !AuLock_FLUSH);
- sysaufs_brs_add(sb, 0);
- si_write_unlock(sb);
- au_sbilist_add(sb);
-
-out:
- return root;
-}
-
-static void aufs_kill_sb(struct super_block *sb)
-{
- struct au_sbinfo *sbinfo;
-
- sbinfo = au_sbi(sb);
- if (sbinfo) {
- au_sbilist_del(sb);
- aufs_write_lock(sb->s_root);
- au_fhsm_fin(sb);
- if (sbinfo->si_wbr_create_ops->fin)
- sbinfo->si_wbr_create_ops->fin(sb);
- if (au_opt_test(sbinfo->si_mntflags, UDBA_HNOTIFY)) {
- au_opt_set_udba(sbinfo->si_mntflags, UDBA_NONE);
- au_remount_refresh(sb, /*do_idop*/0);
- }
- if (au_opt_test(sbinfo->si_mntflags, PLINK))
- au_plink_put(sb, /*verbose*/1);
- au_xino_clr(sb);
- sbinfo->si_sb = NULL;
- aufs_write_unlock(sb->s_root);
- au_nwt_flush(&sbinfo->si_nowait);
- }
- kill_anon_super(sb);
-}
-
-struct file_system_type aufs_fs_type = {
- .name = AUFS_FSTYPE,
- /* a race between rename and others */
- .fs_flags = FS_RENAME_DOES_D_MOVE,
- .mount = aufs_mount,
- .kill_sb = aufs_kill_sb,
- /* no need to __module_get() and module_put(). */
- .owner = THIS_MODULE,
-};
diff --git a/fs/aufs/super.h b/fs/aufs/super.h
deleted file mode 100644
index 2761df917..000000000
--- a/fs/aufs/super.h
+++ /dev/null
@@ -1,628 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * super_block operations
- */
-
-#ifndef __AUFS_SUPER_H__
-#define __AUFS_SUPER_H__
-
-#ifdef __KERNEL__
-
-#include <linux/fs.h>
-#include <linux/kobject.h>
-#include "rwsem.h"
-#include "spl.h"
-#include "wkq.h"
-
-/* policies to select one among multiple writable branches */
-struct au_wbr_copyup_operations {
- int (*copyup)(struct dentry *dentry);
-};
-
-#define AuWbr_DIR 1 /* target is a dir */
-#define AuWbr_PARENT (1 << 1) /* always require a parent */
-
-#define au_ftest_wbr(flags, name) ((flags) & AuWbr_##name)
-#define au_fset_wbr(flags, name) { (flags) |= AuWbr_##name; }
-#define au_fclr_wbr(flags, name) { (flags) &= ~AuWbr_##name; }
-
-struct au_wbr_create_operations {
- int (*create)(struct dentry *dentry, unsigned int flags);
- int (*init)(struct super_block *sb);
- int (*fin)(struct super_block *sb);
-};
-
-struct au_wbr_mfs {
- struct mutex mfs_lock; /* protect this structure */
- unsigned long mfs_jiffy;
- unsigned long mfs_expire;
- aufs_bindex_t mfs_bindex;
-
- unsigned long long mfsrr_bytes;
- unsigned long long mfsrr_watermark;
-};
-
-struct pseudo_link {
- union {
- struct hlist_node hlist;
- struct rcu_head rcu;
- };
- struct inode *inode;
-};
-
-#define AuPlink_NHASH 100
-static inline int au_plink_hash(ino_t ino)
-{
- return ino % AuPlink_NHASH;
-}
-
-/* File-based Hierarchical Storage Management */
-struct au_fhsm {
-#ifdef CONFIG_AUFS_FHSM
- /* allow only one process who can receive the notification */
- spinlock_t fhsm_spin;
- pid_t fhsm_pid;
- wait_queue_head_t fhsm_wqh;
- atomic_t fhsm_readable;
-
- /* these are protected by si_rwsem */
- unsigned long fhsm_expire;
- aufs_bindex_t fhsm_bottom;
-#endif
-};
-
-struct au_branch;
-struct au_sbinfo {
- /* nowait tasks in the system-wide workqueue */
- struct au_nowait_tasks si_nowait;
-
- /*
- * tried sb->s_umount, but failed due to the dependecy between i_mutex.
- * rwsem for au_sbinfo is necessary.
- */
- struct au_rwsem si_rwsem;
-
- /* prevent recursive locking in deleting inode */
- struct {
- unsigned long *bitmap;
- spinlock_t tree_lock;
- struct radix_tree_root tree;
- } au_si_pid;
-
- /*
- * dirty approach to protect sb->sb_inodes and ->s_files (gone) from
- * remount.
- */
- atomic_long_t si_ninodes, si_nfiles;
-
- /* branch management */
- unsigned int si_generation;
-
- /* see AuSi_ flags */
- unsigned char au_si_status;
-
- aufs_bindex_t si_bend;
-
- /* dirty trick to keep br_id plus */
- unsigned int si_last_br_id :
- sizeof(aufs_bindex_t) * BITS_PER_BYTE - 1;
- struct au_branch **si_branch;
-
- /* policy to select a writable branch */
- unsigned char si_wbr_copyup;
- unsigned char si_wbr_create;
- struct au_wbr_copyup_operations *si_wbr_copyup_ops;
- struct au_wbr_create_operations *si_wbr_create_ops;
-
- /* round robin */
- atomic_t si_wbr_rr_next;
-
- /* most free space */
- struct au_wbr_mfs si_wbr_mfs;
-
- /* File-based Hierarchical Storage Management */
- struct au_fhsm si_fhsm;
-
- /* mount flags */
- /* include/asm-ia64/siginfo.h defines a macro named si_flags */
- unsigned int si_mntflags;
-
- /* symlink to follow_link() and put_link() */
- struct au_sphlhead si_symlink;
-
- /* external inode number (bitmap and translation table) */
- vfs_readf_t si_xread;
- vfs_writef_t si_xwrite;
- struct file *si_xib;
- struct mutex si_xib_mtx; /* protect xib members */
- unsigned long *si_xib_buf;
- unsigned long si_xib_last_pindex;
- int si_xib_next_bit;
- aufs_bindex_t si_xino_brid;
- unsigned long si_xino_jiffy;
- unsigned long si_xino_expire;
- /* reserved for future use */
- /* unsigned long long si_xib_limit; */ /* Max xib file size */
-
-#ifdef CONFIG_AUFS_EXPORT
- /* i_generation */
- struct file *si_xigen;
- atomic_t si_xigen_next;
-#endif
-
- /* dirty trick to suppoer atomic_open */
- struct au_sphlhead si_aopen;
-
- /* vdir parameters */
- unsigned long si_rdcache; /* max cache time in jiffies */
- unsigned int si_rdblk; /* deblk size */
- unsigned int si_rdhash; /* hash size */
-
- /*
- * If the number of whiteouts are larger than si_dirwh, leave all of
- * them after au_whtmp_ren to reduce the cost of rmdir(2).
- * future fsck.aufs or kernel thread will remove them later.
- * Otherwise, remove all whiteouts and the dir in rmdir(2).
- */
- unsigned int si_dirwh;
-
- /* pseudo_link list */
- struct au_sphlhead si_plink[AuPlink_NHASH];
- wait_queue_head_t si_plink_wq;
- spinlock_t si_plink_maint_lock;
- pid_t si_plink_maint_pid;
-
- /* file list */
- struct au_sphlhead si_files;
-
- /* with/without getattr, brother of sb->s_d_op */
- struct inode_operations *si_iop_array;
-
- /*
- * sysfs and lifetime management.
- * this is not a small structure and it may be a waste of memory in case
- * of sysfs is disabled, particulary when many aufs-es are mounted.
- * but using sysfs is majority.
- */
- struct kobject si_kobj;
-#ifdef CONFIG_DEBUG_FS
- struct dentry *si_dbgaufs;
- struct dentry *si_dbgaufs_plink;
- struct dentry *si_dbgaufs_xib;
-#ifdef CONFIG_AUFS_EXPORT
- struct dentry *si_dbgaufs_xigen;
-#endif
-#endif
-
-#ifdef CONFIG_AUFS_SBILIST
- struct list_head si_list;
-#endif
-
- /* dirty, necessary for unmounting, sysfs and sysrq */
- struct super_block *si_sb;
-};
-
-/* sbinfo status flags */
-/*
- * set true when refresh_dirs() failed at remount time.
- * then try refreshing dirs at access time again.
- * if it is false, refreshing dirs at access time is unnecesary
- */
-#define AuSi_FAILED_REFRESH_DIR 1
-#define AuSi_FHSM (1 << 1) /* fhsm is active now */
-#define AuSi_NO_DREVAL (1 << 2) /* disable all d_revalidate */
-
-#ifndef CONFIG_AUFS_FHSM
-#undef AuSi_FHSM
-#define AuSi_FHSM 0
-#endif
-
-static inline unsigned char au_do_ftest_si(struct au_sbinfo *sbi,
- unsigned int flag)
-{
- AuRwMustAnyLock(&sbi->si_rwsem);
- return sbi->au_si_status & flag;
-}
-#define au_ftest_si(sbinfo, name) au_do_ftest_si(sbinfo, AuSi_##name)
-#define au_fset_si(sbinfo, name) do { \
- AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
- (sbinfo)->au_si_status |= AuSi_##name; \
-} while (0)
-#define au_fclr_si(sbinfo, name) do { \
- AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
- (sbinfo)->au_si_status &= ~AuSi_##name; \
-} while (0)
-
-/* ---------------------------------------------------------------------- */
-
-/* policy to select one among writable branches */
-#define AuWbrCopyup(sbinfo, ...) \
- ((sbinfo)->si_wbr_copyup_ops->copyup(__VA_ARGS__))
-#define AuWbrCreate(sbinfo, ...) \
- ((sbinfo)->si_wbr_create_ops->create(__VA_ARGS__))
-
-/* flags for si_read_lock()/aufs_read_lock()/di_read_lock() */
-#define AuLock_DW 1 /* write-lock dentry */
-#define AuLock_IR (1 << 1) /* read-lock inode */
-#define AuLock_IW (1 << 2) /* write-lock inode */
-#define AuLock_FLUSH (1 << 3) /* wait for 'nowait' tasks */
-#define AuLock_DIRS (1 << 4) /* target is a pair of dirs */
-#define AuLock_NOPLM (1 << 5) /* return err in plm mode */
-#define AuLock_NOPLMW (1 << 6) /* wait for plm mode ends */
-#define AuLock_GEN (1 << 7) /* test digen/iigen */
-#define au_ftest_lock(flags, name) ((flags) & AuLock_##name)
-#define au_fset_lock(flags, name) \
- do { (flags) |= AuLock_##name; } while (0)
-#define au_fclr_lock(flags, name) \
- do { (flags) &= ~AuLock_##name; } while (0)
-
-/* ---------------------------------------------------------------------- */
-
-/* super.c */
-extern struct file_system_type aufs_fs_type;
-struct inode *au_iget_locked(struct super_block *sb, ino_t ino);
-typedef unsigned long long (*au_arraycb_t)(struct super_block *sb, void *array,
- unsigned long long max, void *arg);
-void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb,
- struct super_block *sb, void *arg);
-struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max);
-void au_iarray_free(struct inode **a, unsigned long long max);
-
-/* sbinfo.c */
-void au_si_free(struct kobject *kobj);
-int au_si_alloc(struct super_block *sb);
-int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr);
-
-unsigned int au_sigen_inc(struct super_block *sb);
-aufs_bindex_t au_new_br_id(struct super_block *sb);
-
-int si_read_lock(struct super_block *sb, int flags);
-int si_write_lock(struct super_block *sb, int flags);
-int aufs_read_lock(struct dentry *dentry, int flags);
-void aufs_read_unlock(struct dentry *dentry, int flags);
-void aufs_write_lock(struct dentry *dentry);
-void aufs_write_unlock(struct dentry *dentry);
-int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags);
-void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2);
-
-int si_pid_test_slow(struct super_block *sb);
-void si_pid_set_slow(struct super_block *sb);
-void si_pid_clr_slow(struct super_block *sb);
-
-/* wbr_policy.c */
-extern struct au_wbr_copyup_operations au_wbr_copyup_ops[];
-extern struct au_wbr_create_operations au_wbr_create_ops[];
-int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst);
-int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex);
-int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t bstart);
-
-/* mvdown.c */
-int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *arg);
-
-#ifdef CONFIG_AUFS_FHSM
-/* fhsm.c */
-
-static inline pid_t au_fhsm_pid(struct au_fhsm *fhsm)
-{
- pid_t pid;
-
- spin_lock(&fhsm->fhsm_spin);
- pid = fhsm->fhsm_pid;
- spin_unlock(&fhsm->fhsm_spin);
-
- return pid;
-}
-
-void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force);
-void au_fhsm_wrote_all(struct super_block *sb, int force);
-int au_fhsm_fd(struct super_block *sb, int oflags);
-int au_fhsm_br_alloc(struct au_branch *br);
-void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex);
-void au_fhsm_fin(struct super_block *sb);
-void au_fhsm_init(struct au_sbinfo *sbinfo);
-void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec);
-void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo);
-#else
-AuStubVoid(au_fhsm_wrote, struct super_block *sb, aufs_bindex_t bindex,
- int force)
-AuStubVoid(au_fhsm_wrote_all, struct super_block *sb, int force)
-AuStub(int, au_fhsm_fd, return -EOPNOTSUPP, struct super_block *sb, int oflags)
-AuStub(pid_t, au_fhsm_pid, return 0, struct au_fhsm *fhsm)
-AuStubInt0(au_fhsm_br_alloc, struct au_branch *br)
-AuStubVoid(au_fhsm_set_bottom, struct super_block *sb, aufs_bindex_t bindex)
-AuStubVoid(au_fhsm_fin, struct super_block *sb)
-AuStubVoid(au_fhsm_init, struct au_sbinfo *sbinfo)
-AuStubVoid(au_fhsm_set, struct au_sbinfo *sbinfo, unsigned int sec)
-AuStubVoid(au_fhsm_show, struct seq_file *seq, struct au_sbinfo *sbinfo)
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-static inline struct au_sbinfo *au_sbi(struct super_block *sb)
-{
- return sb->s_fs_info;
-}
-
-/* ---------------------------------------------------------------------- */
-
-#ifdef CONFIG_AUFS_EXPORT
-int au_test_nfsd(void);
-void au_export_init(struct super_block *sb);
-void au_xigen_inc(struct inode *inode);
-int au_xigen_new(struct inode *inode);
-int au_xigen_set(struct super_block *sb, struct file *base);
-void au_xigen_clr(struct super_block *sb);
-
-static inline int au_busy_or_stale(void)
-{
- if (!au_test_nfsd())
- return -EBUSY;
- return -ESTALE;
-}
-#else
-AuStubInt0(au_test_nfsd, void)
-AuStubVoid(au_export_init, struct super_block *sb)
-AuStubVoid(au_xigen_inc, struct inode *inode)
-AuStubInt0(au_xigen_new, struct inode *inode)
-AuStubInt0(au_xigen_set, struct super_block *sb, struct file *base)
-AuStubVoid(au_xigen_clr, struct super_block *sb)
-AuStub(int, au_busy_or_stale, return -EBUSY, void)
-#endif /* CONFIG_AUFS_EXPORT */
-
-/* ---------------------------------------------------------------------- */
-
-#ifdef CONFIG_AUFS_SBILIST
-/* module.c */
-extern struct au_splhead au_sbilist;
-
-static inline void au_sbilist_init(void)
-{
- au_spl_init(&au_sbilist);
-}
-
-static inline void au_sbilist_add(struct super_block *sb)
-{
- au_spl_add(&au_sbi(sb)->si_list, &au_sbilist);
-}
-
-static inline void au_sbilist_del(struct super_block *sb)
-{
- au_spl_del(&au_sbi(sb)->si_list, &au_sbilist);
-}
-
-#ifdef CONFIG_AUFS_MAGIC_SYSRQ
-static inline void au_sbilist_lock(void)
-{
- spin_lock(&au_sbilist.spin);
-}
-
-static inline void au_sbilist_unlock(void)
-{
- spin_unlock(&au_sbilist.spin);
-}
-#define AuGFP_SBILIST GFP_ATOMIC
-#else
-AuStubVoid(au_sbilist_lock, void)
-AuStubVoid(au_sbilist_unlock, void)
-#define AuGFP_SBILIST GFP_NOFS
-#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
-#else
-AuStubVoid(au_sbilist_init, void)
-AuStubVoid(au_sbilist_add, struct super_block *sb)
-AuStubVoid(au_sbilist_del, struct super_block *sb)
-AuStubVoid(au_sbilist_lock, void)
-AuStubVoid(au_sbilist_unlock, void)
-#define AuGFP_SBILIST GFP_NOFS
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-static inline void dbgaufs_si_null(struct au_sbinfo *sbinfo)
-{
- /*
- * This function is a dynamic '__init' function actually,
- * so the tiny check for si_rwsem is unnecessary.
- */
- /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
-#ifdef CONFIG_DEBUG_FS
- sbinfo->si_dbgaufs = NULL;
- sbinfo->si_dbgaufs_plink = NULL;
- sbinfo->si_dbgaufs_xib = NULL;
-#ifdef CONFIG_AUFS_EXPORT
- sbinfo->si_dbgaufs_xigen = NULL;
-#endif
-#endif
-}
-
-/* ---------------------------------------------------------------------- */
-
-static inline pid_t si_pid_bit(void)
-{
- /* the origin of pid is 1, but the bitmap's is 0 */
- return current->pid - 1;
-}
-
-static inline int si_pid_test(struct super_block *sb)
-{
- pid_t bit;
-
- bit = si_pid_bit();
- if (bit < PID_MAX_DEFAULT)
- return test_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
- return si_pid_test_slow(sb);
-}
-
-static inline void si_pid_set(struct super_block *sb)
-{
- pid_t bit;
-
- bit = si_pid_bit();
- if (bit < PID_MAX_DEFAULT) {
- AuDebugOn(test_bit(bit, au_sbi(sb)->au_si_pid.bitmap));
- set_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
- /* smp_mb(); */
- } else
- si_pid_set_slow(sb);
-}
-
-static inline void si_pid_clr(struct super_block *sb)
-{
- pid_t bit;
-
- bit = si_pid_bit();
- if (bit < PID_MAX_DEFAULT) {
- AuDebugOn(!test_bit(bit, au_sbi(sb)->au_si_pid.bitmap));
- clear_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
- /* smp_mb(); */
- } else
- si_pid_clr_slow(sb);
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* lock superblock. mainly for entry point functions */
-/*
- * __si_read_lock, __si_write_lock,
- * __si_read_unlock, __si_write_unlock, __si_downgrade_lock
- */
-AuSimpleRwsemFuncs(__si, struct super_block *sb, &au_sbi(sb)->si_rwsem);
-
-#define SiMustNoWaiters(sb) AuRwMustNoWaiters(&au_sbi(sb)->si_rwsem)
-#define SiMustAnyLock(sb) AuRwMustAnyLock(&au_sbi(sb)->si_rwsem)
-#define SiMustWriteLock(sb) AuRwMustWriteLock(&au_sbi(sb)->si_rwsem)
-
-static inline void si_noflush_read_lock(struct super_block *sb)
-{
- __si_read_lock(sb);
- si_pid_set(sb);
-}
-
-static inline int si_noflush_read_trylock(struct super_block *sb)
-{
- int locked;
-
- locked = __si_read_trylock(sb);
- if (locked)
- si_pid_set(sb);
- return locked;
-}
-
-static inline void si_noflush_write_lock(struct super_block *sb)
-{
- __si_write_lock(sb);
- si_pid_set(sb);
-}
-
-static inline int si_noflush_write_trylock(struct super_block *sb)
-{
- int locked;
-
- locked = __si_write_trylock(sb);
- if (locked)
- si_pid_set(sb);
- return locked;
-}
-
-#if 0 /* reserved */
-static inline int si_read_trylock(struct super_block *sb, int flags)
-{
- if (au_ftest_lock(flags, FLUSH))
- au_nwt_flush(&au_sbi(sb)->si_nowait);
- return si_noflush_read_trylock(sb);
-}
-#endif
-
-static inline void si_read_unlock(struct super_block *sb)
-{
- si_pid_clr(sb);
- __si_read_unlock(sb);
-}
-
-#if 0 /* reserved */
-static inline int si_write_trylock(struct super_block *sb, int flags)
-{
- if (au_ftest_lock(flags, FLUSH))
- au_nwt_flush(&au_sbi(sb)->si_nowait);
- return si_noflush_write_trylock(sb);
-}
-#endif
-
-static inline void si_write_unlock(struct super_block *sb)
-{
- si_pid_clr(sb);
- __si_write_unlock(sb);
-}
-
-#if 0 /* reserved */
-static inline void si_downgrade_lock(struct super_block *sb)
-{
- __si_downgrade_lock(sb);
-}
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-static inline aufs_bindex_t au_sbend(struct super_block *sb)
-{
- SiMustAnyLock(sb);
- return au_sbi(sb)->si_bend;
-}
-
-static inline unsigned int au_mntflags(struct super_block *sb)
-{
- SiMustAnyLock(sb);
- return au_sbi(sb)->si_mntflags;
-}
-
-static inline unsigned int au_sigen(struct super_block *sb)
-{
- SiMustAnyLock(sb);
- return au_sbi(sb)->si_generation;
-}
-
-static inline void au_ninodes_inc(struct super_block *sb)
-{
- atomic_long_inc(&au_sbi(sb)->si_ninodes);
-}
-
-static inline void au_ninodes_dec(struct super_block *sb)
-{
- AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_ninodes));
- atomic_long_dec(&au_sbi(sb)->si_ninodes);
-}
-
-static inline void au_nfiles_inc(struct super_block *sb)
-{
- atomic_long_inc(&au_sbi(sb)->si_nfiles);
-}
-
-static inline void au_nfiles_dec(struct super_block *sb)
-{
- AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_nfiles));
- atomic_long_dec(&au_sbi(sb)->si_nfiles);
-}
-
-static inline struct au_branch *au_sbr(struct super_block *sb,
- aufs_bindex_t bindex)
-{
- SiMustAnyLock(sb);
- return au_sbi(sb)->si_branch[0 + bindex];
-}
-
-static inline void au_xino_brid_set(struct super_block *sb, aufs_bindex_t brid)
-{
- SiMustWriteLock(sb);
- au_sbi(sb)->si_xino_brid = brid;
-}
-
-static inline aufs_bindex_t au_xino_brid(struct super_block *sb)
-{
- SiMustAnyLock(sb);
- return au_sbi(sb)->si_xino_brid;
-}
-
-#endif /* __KERNEL__ */
-#endif /* __AUFS_SUPER_H__ */
diff --git a/fs/aufs/sysaufs.c b/fs/aufs/sysaufs.c
deleted file mode 100644
index 8ec10fb31..000000000
--- a/fs/aufs/sysaufs.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * sysfs interface and lifetime management
- * they are necessary regardless sysfs is disabled.
- */
-
-#include <linux/random.h>
-#include "aufs.h"
-
-unsigned long sysaufs_si_mask;
-struct kset *sysaufs_kset;
-
-#define AuSiAttr(_name) { \
- .attr = { .name = __stringify(_name), .mode = 0444 }, \
- .show = sysaufs_si_##_name, \
-}
-
-static struct sysaufs_si_attr sysaufs_si_attr_xi_path = AuSiAttr(xi_path);
-struct attribute *sysaufs_si_attrs[] = {
- &sysaufs_si_attr_xi_path.attr,
- NULL,
-};
-
-static const struct sysfs_ops au_sbi_ops = {
- .show = sysaufs_si_show
-};
-
-static struct kobj_type au_sbi_ktype = {
- .release = au_si_free,
- .sysfs_ops = &au_sbi_ops,
- .default_attrs = sysaufs_si_attrs
-};
-
-/* ---------------------------------------------------------------------- */
-
-int sysaufs_si_init(struct au_sbinfo *sbinfo)
-{
- int err;
-
- sbinfo->si_kobj.kset = sysaufs_kset;
- /* cf. sysaufs_name() */
- err = kobject_init_and_add
- (&sbinfo->si_kobj, &au_sbi_ktype, /*&sysaufs_kset->kobj*/NULL,
- SysaufsSiNamePrefix "%lx", sysaufs_si_id(sbinfo));
-
- dbgaufs_si_null(sbinfo);
- if (!err) {
- err = dbgaufs_si_init(sbinfo);
- if (unlikely(err))
- kobject_put(&sbinfo->si_kobj);
- }
- return err;
-}
-
-void sysaufs_fin(void)
-{
- dbgaufs_fin();
- sysfs_remove_group(&sysaufs_kset->kobj, sysaufs_attr_group);
- kset_unregister(sysaufs_kset);
-}
-
-int __init sysaufs_init(void)
-{
- int err;
-
- do {
- get_random_bytes(&sysaufs_si_mask, sizeof(sysaufs_si_mask));
- } while (!sysaufs_si_mask);
-
- err = -EINVAL;
- sysaufs_kset = kset_create_and_add(AUFS_NAME, NULL, fs_kobj);
- if (unlikely(!sysaufs_kset))
- goto out;
- err = PTR_ERR(sysaufs_kset);
- if (IS_ERR(sysaufs_kset))
- goto out;
- err = sysfs_create_group(&sysaufs_kset->kobj, sysaufs_attr_group);
- if (unlikely(err)) {
- kset_unregister(sysaufs_kset);
- goto out;
- }
-
- err = dbgaufs_init();
- if (unlikely(err))
- sysaufs_fin();
-out:
- return err;
-}
diff --git a/fs/aufs/sysaufs.h b/fs/aufs/sysaufs.h
deleted file mode 100644
index 1f799835e..000000000
--- a/fs/aufs/sysaufs.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * sysfs interface and mount lifetime management
- */
-
-#ifndef __SYSAUFS_H__
-#define __SYSAUFS_H__
-
-#ifdef __KERNEL__
-
-#include <linux/sysfs.h>
-#include "module.h"
-
-struct super_block;
-struct au_sbinfo;
-
-struct sysaufs_si_attr {
- struct attribute attr;
- int (*show)(struct seq_file *seq, struct super_block *sb);
-};
-
-/* ---------------------------------------------------------------------- */
-
-/* sysaufs.c */
-extern unsigned long sysaufs_si_mask;
-extern struct kset *sysaufs_kset;
-extern struct attribute *sysaufs_si_attrs[];
-int sysaufs_si_init(struct au_sbinfo *sbinfo);
-int __init sysaufs_init(void);
-void sysaufs_fin(void);
-
-/* ---------------------------------------------------------------------- */
-
-/* some people doesn't like to show a pointer in kernel */
-static inline unsigned long sysaufs_si_id(struct au_sbinfo *sbinfo)
-{
- return sysaufs_si_mask ^ (unsigned long)sbinfo;
-}
-
-#define SysaufsSiNamePrefix "si_"
-#define SysaufsSiNameLen (sizeof(SysaufsSiNamePrefix) + 16)
-static inline void sysaufs_name(struct au_sbinfo *sbinfo, char *name)
-{
- snprintf(name, SysaufsSiNameLen, SysaufsSiNamePrefix "%lx",
- sysaufs_si_id(sbinfo));
-}
-
-struct au_branch;
-#ifdef CONFIG_SYSFS
-/* sysfs.c */
-extern struct attribute_group *sysaufs_attr_group;
-
-int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb);
-ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
- char *buf);
-long au_brinfo_ioctl(struct file *file, unsigned long arg);
-#ifdef CONFIG_COMPAT
-long au_brinfo_compat_ioctl(struct file *file, unsigned long arg);
-#endif
-
-void sysaufs_br_init(struct au_branch *br);
-void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
-void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
-
-#define sysaufs_brs_init() do {} while (0)
-
-#else
-#define sysaufs_attr_group NULL
-
-AuStubInt0(sysaufs_si_xi_path, struct seq_file *seq, struct super_block *sb)
-AuStub(ssize_t, sysaufs_si_show, return 0, struct kobject *kobj,
- struct attribute *attr, char *buf)
-AuStubVoid(sysaufs_br_init, struct au_branch *br)
-AuStubVoid(sysaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
-AuStubVoid(sysaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
-
-static inline void sysaufs_brs_init(void)
-{
- sysaufs_brs = 0;
-}
-
-#endif /* CONFIG_SYSFS */
-
-#endif /* __KERNEL__ */
-#endif /* __SYSAUFS_H__ */
diff --git a/fs/aufs/sysfs.c b/fs/aufs/sysfs.c
deleted file mode 100644
index ed42f53d0..000000000
--- a/fs/aufs/sysfs.c
+++ /dev/null
@@ -1,340 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * sysfs interface
- */
-
-#include <linux/compat.h>
-#include <linux/seq_file.h>
-#include "aufs.h"
-
-static struct attribute *au_attr[] = {
- NULL, /* need to NULL terminate the list of attributes */
-};
-
-static struct attribute_group sysaufs_attr_group_body = {
- .attrs = au_attr
-};
-
-struct attribute_group *sysaufs_attr_group = &sysaufs_attr_group_body;
-
-/* ---------------------------------------------------------------------- */
-
-int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb)
-{
- int err;
-
- SiMustAnyLock(sb);
-
- err = 0;
- if (au_opt_test(au_mntflags(sb), XINO)) {
- err = au_xino_path(seq, au_sbi(sb)->si_xib);
- seq_putc(seq, '\n');
- }
- return err;
-}
-
-/*
- * the lifetime of branch is independent from the entry under sysfs.
- * sysfs handles the lifetime of the entry, and never call ->show() after it is
- * unlinked.
- */
-static int sysaufs_si_br(struct seq_file *seq, struct super_block *sb,
- aufs_bindex_t bindex, int idx)
-{
- int err;
- struct path path;
- struct dentry *root;
- struct au_branch *br;
- au_br_perm_str_t perm;
-
- AuDbg("b%d\n", bindex);
-
- err = 0;
- root = sb->s_root;
- di_read_lock_parent(root, !AuLock_IR);
- br = au_sbr(sb, bindex);
-
- switch (idx) {
- case AuBrSysfs_BR:
- path.mnt = au_br_mnt(br);
- path.dentry = au_h_dptr(root, bindex);
- err = au_seq_path(seq, &path);
- if (!err) {
- au_optstr_br_perm(&perm, br->br_perm);
- seq_printf(seq, "=%s\n", perm.a);
- }
- break;
- case AuBrSysfs_BRID:
- seq_printf(seq, "%d\n", br->br_id);
- break;
- }
- di_read_unlock(root, !AuLock_IR);
- if (unlikely(err || seq_has_overflowed(seq)))
- err = -E2BIG;
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static struct seq_file *au_seq(char *p, ssize_t len)
-{
- struct seq_file *seq;
-
- seq = kzalloc(sizeof(*seq), GFP_NOFS);
- if (seq) {
- /* mutex_init(&seq.lock); */
- seq->buf = p;
- seq->size = len;
- return seq; /* success */
- }
-
- seq = ERR_PTR(-ENOMEM);
- return seq;
-}
-
-#define SysaufsBr_PREFIX "br"
-#define SysaufsBrid_PREFIX "brid"
-
-/* todo: file size may exceed PAGE_SIZE */
-ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- ssize_t err;
- int idx;
- long l;
- aufs_bindex_t bend;
- struct au_sbinfo *sbinfo;
- struct super_block *sb;
- struct seq_file *seq;
- char *name;
- struct attribute **cattr;
-
- sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
- sb = sbinfo->si_sb;
-
- /*
- * prevent a race condition between sysfs and aufs.
- * for instance, sysfs_file_read() calls sysfs_get_active_two() which
- * prohibits maintaining the sysfs entries.
- * hew we acquire read lock after sysfs_get_active_two().
- * on the other hand, the remount process may maintain the sysfs/aufs
- * entries after acquiring write lock.
- * it can cause a deadlock.
- * simply we gave up processing read here.
- */
- err = -EBUSY;
- if (unlikely(!si_noflush_read_trylock(sb)))
- goto out;
-
- seq = au_seq(buf, PAGE_SIZE);
- err = PTR_ERR(seq);
- if (IS_ERR(seq))
- goto out_unlock;
-
- name = (void *)attr->name;
- cattr = sysaufs_si_attrs;
- while (*cattr) {
- if (!strcmp(name, (*cattr)->name)) {
- err = container_of(*cattr, struct sysaufs_si_attr, attr)
- ->show(seq, sb);
- goto out_seq;
- }
- cattr++;
- }
-
- if (!strncmp(name, SysaufsBrid_PREFIX,
- sizeof(SysaufsBrid_PREFIX) - 1)) {
- idx = AuBrSysfs_BRID;
- name += sizeof(SysaufsBrid_PREFIX) - 1;
- } else if (!strncmp(name, SysaufsBr_PREFIX,
- sizeof(SysaufsBr_PREFIX) - 1)) {
- idx = AuBrSysfs_BR;
- name += sizeof(SysaufsBr_PREFIX) - 1;
- } else
- BUG();
-
- err = kstrtol(name, 10, &l);
- if (!err) {
- bend = au_sbend(sb);
- if (l <= bend)
- err = sysaufs_si_br(seq, sb, (aufs_bindex_t)l, idx);
- else
- err = -ENOENT;
- }
-
-out_seq:
- if (!err) {
- err = seq->count;
- /* sysfs limit */
- if (unlikely(err == PAGE_SIZE))
- err = -EFBIG;
- }
- kfree(seq);
-out_unlock:
- si_read_unlock(sb);
-out:
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static int au_brinfo(struct super_block *sb, union aufs_brinfo __user *arg)
-{
- int err;
- int16_t brid;
- aufs_bindex_t bindex, bend;
- size_t sz;
- char *buf;
- struct seq_file *seq;
- struct au_branch *br;
-
- si_read_lock(sb, AuLock_FLUSH);
- bend = au_sbend(sb);
- err = bend + 1;
- if (!arg)
- goto out;
-
- err = -ENOMEM;
- buf = (void *)__get_free_page(GFP_NOFS);
- if (unlikely(!buf))
- goto out;
-
- seq = au_seq(buf, PAGE_SIZE);
- err = PTR_ERR(seq);
- if (IS_ERR(seq))
- goto out_buf;
-
- sz = sizeof(*arg) - offsetof(union aufs_brinfo, path);
- for (bindex = 0; bindex <= bend; bindex++, arg++) {
- err = !access_ok(VERIFY_WRITE, arg, sizeof(*arg));
- if (unlikely(err))
- break;
-
- br = au_sbr(sb, bindex);
- brid = br->br_id;
- BUILD_BUG_ON(sizeof(brid) != sizeof(arg->id));
- err = __put_user(brid, &arg->id);
- if (unlikely(err))
- break;
-
- BUILD_BUG_ON(sizeof(br->br_perm) != sizeof(arg->perm));
- err = __put_user(br->br_perm, &arg->perm);
- if (unlikely(err))
- break;
-
- err = au_seq_path(seq, &br->br_path);
- if (unlikely(err))
- break;
- seq_putc(seq, '\0');
- if (!seq_has_overflowed(seq)) {
- err = copy_to_user(arg->path, seq->buf, seq->count);
- seq->count = 0;
- if (unlikely(err))
- break;
- } else {
- err = -E2BIG;
- goto out_seq;
- }
- }
- if (unlikely(err))
- err = -EFAULT;
-
-out_seq:
- kfree(seq);
-out_buf:
- free_page((unsigned long)buf);
-out:
- si_read_unlock(sb);
- return err;
-}
-
-long au_brinfo_ioctl(struct file *file, unsigned long arg)
-{
- return au_brinfo(file->f_path.dentry->d_sb, (void __user *)arg);
-}
-
-#ifdef CONFIG_COMPAT
-long au_brinfo_compat_ioctl(struct file *file, unsigned long arg)
-{
- return au_brinfo(file->f_path.dentry->d_sb, compat_ptr(arg));
-}
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-void sysaufs_br_init(struct au_branch *br)
-{
- int i;
- struct au_brsysfs *br_sysfs;
- struct attribute *attr;
-
- br_sysfs = br->br_sysfs;
- for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
- attr = &br_sysfs->attr;
- sysfs_attr_init(attr);
- attr->name = br_sysfs->name;
- attr->mode = S_IRUGO;
- br_sysfs++;
- }
-}
-
-void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
-{
- struct au_branch *br;
- struct kobject *kobj;
- struct au_brsysfs *br_sysfs;
- int i;
- aufs_bindex_t bend;
-
- dbgaufs_brs_del(sb, bindex);
-
- if (!sysaufs_brs)
- return;
-
- kobj = &au_sbi(sb)->si_kobj;
- bend = au_sbend(sb);
- for (; bindex <= bend; bindex++) {
- br = au_sbr(sb, bindex);
- br_sysfs = br->br_sysfs;
- for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
- sysfs_remove_file(kobj, &br_sysfs->attr);
- br_sysfs++;
- }
- }
-}
-
-void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
-{
- int err, i;
- aufs_bindex_t bend;
- struct kobject *kobj;
- struct au_branch *br;
- struct au_brsysfs *br_sysfs;
-
- dbgaufs_brs_add(sb, bindex);
-
- if (!sysaufs_brs)
- return;
-
- kobj = &au_sbi(sb)->si_kobj;
- bend = au_sbend(sb);
- for (; bindex <= bend; bindex++) {
- br = au_sbr(sb, bindex);
- br_sysfs = br->br_sysfs;
- snprintf(br_sysfs[AuBrSysfs_BR].name, sizeof(br_sysfs->name),
- SysaufsBr_PREFIX "%d", bindex);
- snprintf(br_sysfs[AuBrSysfs_BRID].name, sizeof(br_sysfs->name),
- SysaufsBrid_PREFIX "%d", bindex);
- for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
- err = sysfs_create_file(kobj, &br_sysfs->attr);
- if (unlikely(err))
- pr_warn("failed %s under sysfs(%d)\n",
- br_sysfs->name, err);
- br_sysfs++;
- }
- }
-}
diff --git a/fs/aufs/sysrq.c b/fs/aufs/sysrq.c
deleted file mode 100644
index 7921ed716..000000000
--- a/fs/aufs/sysrq.c
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * magic sysrq hanlder
- */
-
-/* #include <linux/sysrq.h> */
-#include <linux/writeback.h>
-#include "aufs.h"
-
-/* ---------------------------------------------------------------------- */
-
-static void sysrq_sb(struct super_block *sb)
-{
- char *plevel;
- struct au_sbinfo *sbinfo;
- struct file *file;
- struct au_sphlhead *files;
- struct au_finfo *finfo;
-
- plevel = au_plevel;
- au_plevel = KERN_WARNING;
-
- /* since we define pr_fmt, call printk directly */
-#define pr(str) printk(KERN_WARNING AUFS_NAME ": " str)
-
- sbinfo = au_sbi(sb);
- printk(KERN_WARNING "si=%lx\n", sysaufs_si_id(sbinfo));
- pr("superblock\n");
- au_dpri_sb(sb);
-
-#if 0
- pr("root dentry\n");
- au_dpri_dentry(sb->s_root);
- pr("root inode\n");
- au_dpri_inode(d_inode(sb->s_root));
-#endif
-
-#if 0
- do {
- int err, i, j, ndentry;
- struct au_dcsub_pages dpages;
- struct au_dpage *dpage;
-
- err = au_dpages_init(&dpages, GFP_ATOMIC);
- if (unlikely(err))
- break;
- err = au_dcsub_pages(&dpages, sb->s_root, NULL, NULL);
- if (!err)
- for (i = 0; i < dpages.ndpage; i++) {
- dpage = dpages.dpages + i;
- ndentry = dpage->ndentry;
- for (j = 0; j < ndentry; j++)
- au_dpri_dentry(dpage->dentries[j]);
- }
- au_dpages_free(&dpages);
- } while (0);
-#endif
-
-#if 1
- {
- struct inode *i;
-
- pr("isolated inode\n");
- spin_lock(&sb->s_inode_list_lock);
- list_for_each_entry(i, &sb->s_inodes, i_sb_list) {
- spin_lock(&i->i_lock);
- if (1 || hlist_empty(&i->i_dentry))
- au_dpri_inode(i);
- spin_unlock(&i->i_lock);
- }
- spin_unlock(&sb->s_inode_list_lock);
- }
-#endif
- pr("files\n");
- files = &au_sbi(sb)->si_files;
- spin_lock(&files->spin);
- hlist_for_each_entry(finfo, &files->head, fi_hlist) {
- umode_t mode;
-
- file = finfo->fi_file;
- mode = file_inode(file)->i_mode;
- if (!special_file(mode))
- au_dpri_file(file);
- }
- spin_unlock(&files->spin);
- pr("done\n");
-
-#undef pr
- au_plevel = plevel;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* module parameter */
-static char *aufs_sysrq_key = "a";
-module_param_named(sysrq, aufs_sysrq_key, charp, S_IRUGO);
-MODULE_PARM_DESC(sysrq, "MagicSysRq key for " AUFS_NAME);
-
-static void au_sysrq(int key __maybe_unused)
-{
- struct au_sbinfo *sbinfo;
-
- lockdep_off();
- au_sbilist_lock();
- list_for_each_entry(sbinfo, &au_sbilist.head, si_list)
- sysrq_sb(sbinfo->si_sb);
- au_sbilist_unlock();
- lockdep_on();
-}
-
-static struct sysrq_key_op au_sysrq_op = {
- .handler = au_sysrq,
- .help_msg = "Aufs",
- .action_msg = "Aufs",
- .enable_mask = SYSRQ_ENABLE_DUMP
-};
-
-/* ---------------------------------------------------------------------- */
-
-int __init au_sysrq_init(void)
-{
- int err;
- char key;
-
- err = -1;
- key = *aufs_sysrq_key;
- if ('a' <= key && key <= 'z')
- err = register_sysrq_key(key, &au_sysrq_op);
- if (unlikely(err))
- pr_err("err %d, sysrq=%c\n", err, key);
- return err;
-}
-
-void au_sysrq_fin(void)
-{
- int err;
-
- err = unregister_sysrq_key(*aufs_sysrq_key, &au_sysrq_op);
- if (unlikely(err))
- pr_err("err %d (ignored)\n", err);
-}
diff --git a/fs/aufs/vdir.c b/fs/aufs/vdir.c
deleted file mode 100644
index f64cc2b7a..000000000
--- a/fs/aufs/vdir.c
+++ /dev/null
@@ -1,875 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * virtual or vertical directory
- */
-
-#include "aufs.h"
-
-static unsigned int calc_size(int nlen)
-{
- return ALIGN(sizeof(struct au_vdir_de) + nlen, sizeof(ino_t));
-}
-
-static int set_deblk_end(union au_vdir_deblk_p *p,
- union au_vdir_deblk_p *deblk_end)
-{
- if (calc_size(0) <= deblk_end->deblk - p->deblk) {
- p->de->de_str.len = 0;
- /* smp_mb(); */
- return 0;
- }
- return -1; /* error */
-}
-
-/* returns true or false */
-static int is_deblk_end(union au_vdir_deblk_p *p,
- union au_vdir_deblk_p *deblk_end)
-{
- if (calc_size(0) <= deblk_end->deblk - p->deblk)
- return !p->de->de_str.len;
- return 1;
-}
-
-static unsigned char *last_deblk(struct au_vdir *vdir)
-{
- return vdir->vd_deblk[vdir->vd_nblk - 1];
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* estimate the appropriate size for name hash table */
-unsigned int au_rdhash_est(loff_t sz)
-{
- unsigned int n;
-
- n = UINT_MAX;
- sz >>= 10;
- if (sz < n)
- n = sz;
- if (sz < AUFS_RDHASH_DEF)
- n = AUFS_RDHASH_DEF;
- /* pr_info("n %u\n", n); */
- return n;
-}
-
-/*
- * the allocated memory has to be freed by
- * au_nhash_wh_free() or au_nhash_de_free().
- */
-int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp)
-{
- struct hlist_head *head;
- unsigned int u;
- size_t sz;
-
- sz = sizeof(*nhash->nh_head) * num_hash;
- head = kmalloc(sz, gfp);
- if (head) {
- nhash->nh_num = num_hash;
- nhash->nh_head = head;
- for (u = 0; u < num_hash; u++)
- INIT_HLIST_HEAD(head++);
- return 0; /* success */
- }
-
- return -ENOMEM;
-}
-
-static void nhash_count(struct hlist_head *head)
-{
-#if 0
- unsigned long n;
- struct hlist_node *pos;
-
- n = 0;
- hlist_for_each(pos, head)
- n++;
- pr_info("%lu\n", n);
-#endif
-}
-
-static void au_nhash_wh_do_free(struct hlist_head *head)
-{
- struct au_vdir_wh *pos;
- struct hlist_node *node;
-
- hlist_for_each_entry_safe(pos, node, head, wh_hash)
- kfree(pos);
-}
-
-static void au_nhash_de_do_free(struct hlist_head *head)
-{
- struct au_vdir_dehstr *pos;
- struct hlist_node *node;
-
- hlist_for_each_entry_safe(pos, node, head, hash)
- au_cache_free_vdir_dehstr(pos);
-}
-
-static void au_nhash_do_free(struct au_nhash *nhash,
- void (*free)(struct hlist_head *head))
-{
- unsigned int n;
- struct hlist_head *head;
-
- n = nhash->nh_num;
- if (!n)
- return;
-
- head = nhash->nh_head;
- while (n-- > 0) {
- nhash_count(head);
- free(head++);
- }
- kfree(nhash->nh_head);
-}
-
-void au_nhash_wh_free(struct au_nhash *whlist)
-{
- au_nhash_do_free(whlist, au_nhash_wh_do_free);
-}
-
-static void au_nhash_de_free(struct au_nhash *delist)
-{
- au_nhash_do_free(delist, au_nhash_de_do_free);
-}
-
-/* ---------------------------------------------------------------------- */
-
-int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
- int limit)
-{
- int num;
- unsigned int u, n;
- struct hlist_head *head;
- struct au_vdir_wh *pos;
-
- num = 0;
- n = whlist->nh_num;
- head = whlist->nh_head;
- for (u = 0; u < n; u++, head++)
- hlist_for_each_entry(pos, head, wh_hash)
- if (pos->wh_bindex == btgt && ++num > limit)
- return 1;
- return 0;
-}
-
-static struct hlist_head *au_name_hash(struct au_nhash *nhash,
- unsigned char *name,
- unsigned int len)
-{
- unsigned int v;
- /* const unsigned int magic_bit = 12; */
-
- AuDebugOn(!nhash->nh_num || !nhash->nh_head);
-
- v = 0;
- while (len--)
- v += *name++;
- /* v = hash_long(v, magic_bit); */
- v %= nhash->nh_num;
- return nhash->nh_head + v;
-}
-
-static int au_nhash_test_name(struct au_vdir_destr *str, const char *name,
- int nlen)
-{
- return str->len == nlen && !memcmp(str->name, name, nlen);
-}
-
-/* returns found or not */
-int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen)
-{
- struct hlist_head *head;
- struct au_vdir_wh *pos;
- struct au_vdir_destr *str;
-
- head = au_name_hash(whlist, name, nlen);
- hlist_for_each_entry(pos, head, wh_hash) {
- str = &pos->wh_str;
- AuDbg("%.*s\n", str->len, str->name);
- if (au_nhash_test_name(str, name, nlen))
- return 1;
- }
- return 0;
-}
-
-/* returns found(true) or not */
-static int test_known(struct au_nhash *delist, char *name, int nlen)
-{
- struct hlist_head *head;
- struct au_vdir_dehstr *pos;
- struct au_vdir_destr *str;
-
- head = au_name_hash(delist, name, nlen);
- hlist_for_each_entry(pos, head, hash) {
- str = pos->str;
- AuDbg("%.*s\n", str->len, str->name);
- if (au_nhash_test_name(str, name, nlen))
- return 1;
- }
- return 0;
-}
-
-static void au_shwh_init_wh(struct au_vdir_wh *wh, ino_t ino,
- unsigned char d_type)
-{
-#ifdef CONFIG_AUFS_SHWH
- wh->wh_ino = ino;
- wh->wh_type = d_type;
-#endif
-}
-
-/* ---------------------------------------------------------------------- */
-
-int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
- unsigned int d_type, aufs_bindex_t bindex,
- unsigned char shwh)
-{
- int err;
- struct au_vdir_destr *str;
- struct au_vdir_wh *wh;
-
- AuDbg("%.*s\n", nlen, name);
- AuDebugOn(!whlist->nh_num || !whlist->nh_head);
-
- err = -ENOMEM;
- wh = kmalloc(sizeof(*wh) + nlen, GFP_NOFS);
- if (unlikely(!wh))
- goto out;
-
- err = 0;
- wh->wh_bindex = bindex;
- if (shwh)
- au_shwh_init_wh(wh, ino, d_type);
- str = &wh->wh_str;
- str->len = nlen;
- memcpy(str->name, name, nlen);
- hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, nlen));
- /* smp_mb(); */
-
-out:
- return err;
-}
-
-static int append_deblk(struct au_vdir *vdir)
-{
- int err;
- unsigned long ul;
- const unsigned int deblk_sz = vdir->vd_deblk_sz;
- union au_vdir_deblk_p p, deblk_end;
- unsigned char **o;
-
- err = -ENOMEM;
- o = krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1),
- GFP_NOFS);
- if (unlikely(!o))
- goto out;
-
- vdir->vd_deblk = o;
- p.deblk = kmalloc(deblk_sz, GFP_NOFS);
- if (p.deblk) {
- ul = vdir->vd_nblk++;
- vdir->vd_deblk[ul] = p.deblk;
- vdir->vd_last.ul = ul;
- vdir->vd_last.p.deblk = p.deblk;
- deblk_end.deblk = p.deblk + deblk_sz;
- err = set_deblk_end(&p, &deblk_end);
- }
-
-out:
- return err;
-}
-
-static int append_de(struct au_vdir *vdir, char *name, int nlen, ino_t ino,
- unsigned int d_type, struct au_nhash *delist)
-{
- int err;
- unsigned int sz;
- const unsigned int deblk_sz = vdir->vd_deblk_sz;
- union au_vdir_deblk_p p, *room, deblk_end;
- struct au_vdir_dehstr *dehstr;
-
- p.deblk = last_deblk(vdir);
- deblk_end.deblk = p.deblk + deblk_sz;
- room = &vdir->vd_last.p;
- AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk
- || !is_deblk_end(room, &deblk_end));
-
- sz = calc_size(nlen);
- if (unlikely(sz > deblk_end.deblk - room->deblk)) {
- err = append_deblk(vdir);
- if (unlikely(err))
- goto out;
-
- p.deblk = last_deblk(vdir);
- deblk_end.deblk = p.deblk + deblk_sz;
- /* smp_mb(); */
- AuDebugOn(room->deblk != p.deblk);
- }
-
- err = -ENOMEM;
- dehstr = au_cache_alloc_vdir_dehstr();
- if (unlikely(!dehstr))
- goto out;
-
- dehstr->str = &room->de->de_str;
- hlist_add_head(&dehstr->hash, au_name_hash(delist, name, nlen));
- room->de->de_ino = ino;
- room->de->de_type = d_type;
- room->de->de_str.len = nlen;
- memcpy(room->de->de_str.name, name, nlen);
-
- err = 0;
- room->deblk += sz;
- if (unlikely(set_deblk_end(room, &deblk_end)))
- err = append_deblk(vdir);
- /* smp_mb(); */
-
-out:
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-void au_vdir_free(struct au_vdir *vdir)
-{
- unsigned char **deblk;
-
- deblk = vdir->vd_deblk;
- while (vdir->vd_nblk--)
- kfree(*deblk++);
- kfree(vdir->vd_deblk);
- au_cache_free_vdir(vdir);
-}
-
-static struct au_vdir *alloc_vdir(struct file *file)
-{
- struct au_vdir *vdir;
- struct super_block *sb;
- int err;
-
- sb = file->f_path.dentry->d_sb;
- SiMustAnyLock(sb);
-
- err = -ENOMEM;
- vdir = au_cache_alloc_vdir();
- if (unlikely(!vdir))
- goto out;
-
- vdir->vd_deblk = kzalloc(sizeof(*vdir->vd_deblk), GFP_NOFS);
- if (unlikely(!vdir->vd_deblk))
- goto out_free;
-
- vdir->vd_deblk_sz = au_sbi(sb)->si_rdblk;
- if (!vdir->vd_deblk_sz) {
- /* estimate the appropriate size for deblk */
- vdir->vd_deblk_sz = au_dir_size(file, /*dentry*/NULL);
- /* pr_info("vd_deblk_sz %u\n", vdir->vd_deblk_sz); */
- }
- vdir->vd_nblk = 0;
- vdir->vd_version = 0;
- vdir->vd_jiffy = 0;
- err = append_deblk(vdir);
- if (!err)
- return vdir; /* success */
-
- kfree(vdir->vd_deblk);
-
-out_free:
- au_cache_free_vdir(vdir);
-out:
- vdir = ERR_PTR(err);
- return vdir;
-}
-
-static int reinit_vdir(struct au_vdir *vdir)
-{
- int err;
- union au_vdir_deblk_p p, deblk_end;
-
- while (vdir->vd_nblk > 1) {
- kfree(vdir->vd_deblk[vdir->vd_nblk - 1]);
- /* vdir->vd_deblk[vdir->vd_nblk - 1] = NULL; */
- vdir->vd_nblk--;
- }
- p.deblk = vdir->vd_deblk[0];
- deblk_end.deblk = p.deblk + vdir->vd_deblk_sz;
- err = set_deblk_end(&p, &deblk_end);
- /* keep vd_dblk_sz */
- vdir->vd_last.ul = 0;
- vdir->vd_last.p.deblk = vdir->vd_deblk[0];
- vdir->vd_version = 0;
- vdir->vd_jiffy = 0;
- /* smp_mb(); */
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-#define AuFillVdir_CALLED 1
-#define AuFillVdir_WHABLE (1 << 1)
-#define AuFillVdir_SHWH (1 << 2)
-#define au_ftest_fillvdir(flags, name) ((flags) & AuFillVdir_##name)
-#define au_fset_fillvdir(flags, name) \
- do { (flags) |= AuFillVdir_##name; } while (0)
-#define au_fclr_fillvdir(flags, name) \
- do { (flags) &= ~AuFillVdir_##name; } while (0)
-
-#ifndef CONFIG_AUFS_SHWH
-#undef AuFillVdir_SHWH
-#define AuFillVdir_SHWH 0
-#endif
-
-struct fillvdir_arg {
- struct dir_context ctx;
- struct file *file;
- struct au_vdir *vdir;
- struct au_nhash delist;
- struct au_nhash whlist;
- aufs_bindex_t bindex;
- unsigned int flags;
- int err;
-};
-
-static int fillvdir(struct dir_context *ctx, const char *__name, int nlen,
- loff_t offset __maybe_unused, u64 h_ino,
- unsigned int d_type)
-{
- struct fillvdir_arg *arg = container_of(ctx, struct fillvdir_arg, ctx);
- char *name = (void *)__name;
- struct super_block *sb;
- ino_t ino;
- const unsigned char shwh = !!au_ftest_fillvdir(arg->flags, SHWH);
-
- arg->err = 0;
- sb = arg->file->f_path.dentry->d_sb;
- au_fset_fillvdir(arg->flags, CALLED);
- /* smp_mb(); */
- if (nlen <= AUFS_WH_PFX_LEN
- || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
- if (test_known(&arg->delist, name, nlen)
- || au_nhash_test_known_wh(&arg->whlist, name, nlen))
- goto out; /* already exists or whiteouted */
-
- arg->err = au_ino(sb, arg->bindex, h_ino, d_type, &ino);
- if (!arg->err) {
- if (unlikely(nlen > AUFS_MAX_NAMELEN))
- d_type = DT_UNKNOWN;
- arg->err = append_de(arg->vdir, name, nlen, ino,
- d_type, &arg->delist);
- }
- } else if (au_ftest_fillvdir(arg->flags, WHABLE)) {
- name += AUFS_WH_PFX_LEN;
- nlen -= AUFS_WH_PFX_LEN;
- if (au_nhash_test_known_wh(&arg->whlist, name, nlen))
- goto out; /* already whiteouted */
-
- if (shwh)
- arg->err = au_wh_ino(sb, arg->bindex, h_ino, d_type,
- &ino);
- if (!arg->err) {
- if (nlen <= AUFS_MAX_NAMELEN + AUFS_WH_PFX_LEN)
- d_type = DT_UNKNOWN;
- arg->err = au_nhash_append_wh
- (&arg->whlist, name, nlen, ino, d_type,
- arg->bindex, shwh);
- }
- }
-
-out:
- if (!arg->err)
- arg->vdir->vd_jiffy = jiffies;
- /* smp_mb(); */
- AuTraceErr(arg->err);
- return arg->err;
-}
-
-static int au_handle_shwh(struct super_block *sb, struct au_vdir *vdir,
- struct au_nhash *whlist, struct au_nhash *delist)
-{
-#ifdef CONFIG_AUFS_SHWH
- int err;
- unsigned int nh, u;
- struct hlist_head *head;
- struct au_vdir_wh *pos;
- struct hlist_node *n;
- char *p, *o;
- struct au_vdir_destr *destr;
-
- AuDebugOn(!au_opt_test(au_mntflags(sb), SHWH));
-
- err = -ENOMEM;
- o = p = (void *)__get_free_page(GFP_NOFS);
- if (unlikely(!p))
- goto out;
-
- err = 0;
- nh = whlist->nh_num;
- memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
- p += AUFS_WH_PFX_LEN;
- for (u = 0; u < nh; u++) {
- head = whlist->nh_head + u;
- hlist_for_each_entry_safe(pos, n, head, wh_hash) {
- destr = &pos->wh_str;
- memcpy(p, destr->name, destr->len);
- err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN,
- pos->wh_ino, pos->wh_type, delist);
- if (unlikely(err))
- break;
- }
- }
-
- free_page((unsigned long)o);
-
-out:
- AuTraceErr(err);
- return err;
-#else
- return 0;
-#endif
-}
-
-static int au_do_read_vdir(struct fillvdir_arg *arg)
-{
- int err;
- unsigned int rdhash;
- loff_t offset;
- aufs_bindex_t bend, bindex, bstart;
- unsigned char shwh;
- struct file *hf, *file;
- struct super_block *sb;
-
- file = arg->file;
- sb = file->f_path.dentry->d_sb;
- SiMustAnyLock(sb);
-
- rdhash = au_sbi(sb)->si_rdhash;
- if (!rdhash)
- rdhash = au_rdhash_est(au_dir_size(file, /*dentry*/NULL));
- err = au_nhash_alloc(&arg->delist, rdhash, GFP_NOFS);
- if (unlikely(err))
- goto out;
- err = au_nhash_alloc(&arg->whlist, rdhash, GFP_NOFS);
- if (unlikely(err))
- goto out_delist;
-
- err = 0;
- arg->flags = 0;
- shwh = 0;
- if (au_opt_test(au_mntflags(sb), SHWH)) {
- shwh = 1;
- au_fset_fillvdir(arg->flags, SHWH);
- }
- bstart = au_fbstart(file);
- bend = au_fbend_dir(file);
- for (bindex = bstart; !err && bindex <= bend; bindex++) {
- hf = au_hf_dir(file, bindex);
- if (!hf)
- continue;
-
- offset = vfsub_llseek(hf, 0, SEEK_SET);
- err = offset;
- if (unlikely(offset))
- break;
-
- arg->bindex = bindex;
- au_fclr_fillvdir(arg->flags, WHABLE);
- if (shwh
- || (bindex != bend
- && au_br_whable(au_sbr_perm(sb, bindex))))
- au_fset_fillvdir(arg->flags, WHABLE);
- do {
- arg->err = 0;
- au_fclr_fillvdir(arg->flags, CALLED);
- /* smp_mb(); */
- err = vfsub_iterate_dir(hf, &arg->ctx);
- if (err >= 0)
- err = arg->err;
- } while (!err && au_ftest_fillvdir(arg->flags, CALLED));
-
- /*
- * dir_relax() may be good for concurrency, but aufs should not
- * use it since it will cause a lockdep problem.
- */
- }
-
- if (!err && shwh)
- err = au_handle_shwh(sb, arg->vdir, &arg->whlist, &arg->delist);
-
- au_nhash_wh_free(&arg->whlist);
-
-out_delist:
- au_nhash_de_free(&arg->delist);
-out:
- return err;
-}
-
-static int read_vdir(struct file *file, int may_read)
-{
- int err;
- unsigned long expire;
- unsigned char do_read;
- struct fillvdir_arg arg = {
- .ctx = {
- .actor = fillvdir
- }
- };
- struct inode *inode;
- struct au_vdir *vdir, *allocated;
-
- err = 0;
- inode = file_inode(file);
- IMustLock(inode);
- SiMustAnyLock(inode->i_sb);
-
- allocated = NULL;
- do_read = 0;
- expire = au_sbi(inode->i_sb)->si_rdcache;
- vdir = au_ivdir(inode);
- if (!vdir) {
- do_read = 1;
- vdir = alloc_vdir(file);
- err = PTR_ERR(vdir);
- if (IS_ERR(vdir))
- goto out;
- err = 0;
- allocated = vdir;
- } else if (may_read
- && (inode->i_version != vdir->vd_version
- || time_after(jiffies, vdir->vd_jiffy + expire))) {
- do_read = 1;
- err = reinit_vdir(vdir);
- if (unlikely(err))
- goto out;
- }
-
- if (!do_read)
- return 0; /* success */
-
- arg.file = file;
- arg.vdir = vdir;
- err = au_do_read_vdir(&arg);
- if (!err) {
- /* file->f_pos = 0; */ /* todo: ctx->pos? */
- vdir->vd_version = inode->i_version;
- vdir->vd_last.ul = 0;
- vdir->vd_last.p.deblk = vdir->vd_deblk[0];
- if (allocated)
- au_set_ivdir(inode, allocated);
- } else if (allocated)
- au_vdir_free(allocated);
-
-out:
- return err;
-}
-
-static int copy_vdir(struct au_vdir *tgt, struct au_vdir *src)
-{
- int err, rerr;
- unsigned long ul, n;
- const unsigned int deblk_sz = src->vd_deblk_sz;
-
- AuDebugOn(tgt->vd_nblk != 1);
-
- err = -ENOMEM;
- if (tgt->vd_nblk < src->vd_nblk) {
- unsigned char **p;
-
- p = krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk,
- GFP_NOFS);
- if (unlikely(!p))
- goto out;
- tgt->vd_deblk = p;
- }
-
- if (tgt->vd_deblk_sz != deblk_sz) {
- unsigned char *p;
-
- tgt->vd_deblk_sz = deblk_sz;
- p = krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS);
- if (unlikely(!p))
- goto out;
- tgt->vd_deblk[0] = p;
- }
- memcpy(tgt->vd_deblk[0], src->vd_deblk[0], deblk_sz);
- tgt->vd_version = src->vd_version;
- tgt->vd_jiffy = src->vd_jiffy;
-
- n = src->vd_nblk;
- for (ul = 1; ul < n; ul++) {
- tgt->vd_deblk[ul] = kmemdup(src->vd_deblk[ul], deblk_sz,
- GFP_NOFS);
- if (unlikely(!tgt->vd_deblk[ul]))
- goto out;
- tgt->vd_nblk++;
- }
- tgt->vd_nblk = n;
- tgt->vd_last.ul = tgt->vd_last.ul;
- tgt->vd_last.p.deblk = tgt->vd_deblk[tgt->vd_last.ul];
- tgt->vd_last.p.deblk += src->vd_last.p.deblk
- - src->vd_deblk[src->vd_last.ul];
- /* smp_mb(); */
- return 0; /* success */
-
-out:
- rerr = reinit_vdir(tgt);
- BUG_ON(rerr);
- return err;
-}
-
-int au_vdir_init(struct file *file)
-{
- int err;
- struct inode *inode;
- struct au_vdir *vdir_cache, *allocated;
-
- /* test file->f_pos here instead of ctx->pos */
- err = read_vdir(file, !file->f_pos);
- if (unlikely(err))
- goto out;
-
- allocated = NULL;
- vdir_cache = au_fvdir_cache(file);
- if (!vdir_cache) {
- vdir_cache = alloc_vdir(file);
- err = PTR_ERR(vdir_cache);
- if (IS_ERR(vdir_cache))
- goto out;
- allocated = vdir_cache;
- } else if (!file->f_pos && vdir_cache->vd_version != file->f_version) {
- /* test file->f_pos here instead of ctx->pos */
- err = reinit_vdir(vdir_cache);
- if (unlikely(err))
- goto out;
- } else
- return 0; /* success */
-
- inode = file_inode(file);
- err = copy_vdir(vdir_cache, au_ivdir(inode));
- if (!err) {
- file->f_version = inode->i_version;
- if (allocated)
- au_set_fvdir_cache(file, allocated);
- } else if (allocated)
- au_vdir_free(allocated);
-
-out:
- return err;
-}
-
-static loff_t calc_offset(struct au_vdir *vdir)
-{
- loff_t offset;
- union au_vdir_deblk_p p;
-
- p.deblk = vdir->vd_deblk[vdir->vd_last.ul];
- offset = vdir->vd_last.p.deblk - p.deblk;
- offset += vdir->vd_deblk_sz * vdir->vd_last.ul;
- return offset;
-}
-
-/* returns true or false */
-static int seek_vdir(struct file *file, struct dir_context *ctx)
-{
- int valid;
- unsigned int deblk_sz;
- unsigned long ul, n;
- loff_t offset;
- union au_vdir_deblk_p p, deblk_end;
- struct au_vdir *vdir_cache;
-
- valid = 1;
- vdir_cache = au_fvdir_cache(file);
- offset = calc_offset(vdir_cache);
- AuDbg("offset %lld\n", offset);
- if (ctx->pos == offset)
- goto out;
-
- vdir_cache->vd_last.ul = 0;
- vdir_cache->vd_last.p.deblk = vdir_cache->vd_deblk[0];
- if (!ctx->pos)
- goto out;
-
- valid = 0;
- deblk_sz = vdir_cache->vd_deblk_sz;
- ul = div64_u64(ctx->pos, deblk_sz);
- AuDbg("ul %lu\n", ul);
- if (ul >= vdir_cache->vd_nblk)
- goto out;
-
- n = vdir_cache->vd_nblk;
- for (; ul < n; ul++) {
- p.deblk = vdir_cache->vd_deblk[ul];
- deblk_end.deblk = p.deblk + deblk_sz;
- offset = ul;
- offset *= deblk_sz;
- while (!is_deblk_end(&p, &deblk_end) && offset < ctx->pos) {
- unsigned int l;
-
- l = calc_size(p.de->de_str.len);
- offset += l;
- p.deblk += l;
- }
- if (!is_deblk_end(&p, &deblk_end)) {
- valid = 1;
- vdir_cache->vd_last.ul = ul;
- vdir_cache->vd_last.p = p;
- break;
- }
- }
-
-out:
- /* smp_mb(); */
- AuTraceErr(!valid);
- return valid;
-}
-
-int au_vdir_fill_de(struct file *file, struct dir_context *ctx)
-{
- unsigned int l, deblk_sz;
- union au_vdir_deblk_p deblk_end;
- struct au_vdir *vdir_cache;
- struct au_vdir_de *de;
-
- vdir_cache = au_fvdir_cache(file);
- if (!seek_vdir(file, ctx))
- return 0;
-
- deblk_sz = vdir_cache->vd_deblk_sz;
- while (1) {
- deblk_end.deblk = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
- deblk_end.deblk += deblk_sz;
- while (!is_deblk_end(&vdir_cache->vd_last.p, &deblk_end)) {
- de = vdir_cache->vd_last.p.de;
- AuDbg("%.*s, off%lld, i%lu, dt%d\n",
- de->de_str.len, de->de_str.name, ctx->pos,
- (unsigned long)de->de_ino, de->de_type);
- if (unlikely(!dir_emit(ctx, de->de_str.name,
- de->de_str.len, de->de_ino,
- de->de_type))) {
- /* todo: ignore the error caused by udba? */
- /* return err; */
- return 0;
- }
-
- l = calc_size(de->de_str.len);
- vdir_cache->vd_last.p.deblk += l;
- ctx->pos += l;
- }
- if (vdir_cache->vd_last.ul < vdir_cache->vd_nblk - 1) {
- vdir_cache->vd_last.ul++;
- vdir_cache->vd_last.p.deblk
- = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
- ctx->pos = deblk_sz * vdir_cache->vd_last.ul;
- continue;
- }
- break;
- }
-
- /* smp_mb(); */
- return 0;
-}
diff --git a/fs/aufs/vfsub.c b/fs/aufs/vfsub.c
deleted file mode 100644
index 89f999c97..000000000
--- a/fs/aufs/vfsub.c
+++ /dev/null
@@ -1,853 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * sub-routines for VFS
- */
-
-#include <linux/namei.h>
-#include <linux/nsproxy.h>
-#include <linux/security.h>
-#include <linux/splice.h>
-#include "../fs/mount.h"
-#include "aufs.h"
-
-#ifdef CONFIG_AUFS_BR_FUSE
-int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb)
-{
- struct nsproxy *ns;
-
- if (!au_test_fuse(h_sb) || !au_userns)
- return 0;
-
- ns = current->nsproxy;
- /* no {get,put}_nsproxy(ns) */
- return real_mount(mnt)->mnt_ns == ns->mnt_ns ? 0 : -EACCES;
-}
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-int vfsub_update_h_iattr(struct path *h_path, int *did)
-{
- int err;
- struct kstat st;
- struct super_block *h_sb;
-
- /* for remote fs, leave work for its getattr or d_revalidate */
- /* for bad i_attr fs, handle them in aufs_getattr() */
- /* still some fs may acquire i_mutex. we need to skip them */
- err = 0;
- if (!did)
- did = &err;
- h_sb = h_path->dentry->d_sb;
- *did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb));
- if (*did)
- err = vfs_getattr(h_path, &st);
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-struct file *vfsub_dentry_open(struct path *path, int flags)
-{
- struct file *file;
-
- file = dentry_open(path, flags /* | __FMODE_NONOTIFY */,
- current_cred());
- if (!IS_ERR_OR_NULL(file)
- && (file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
- i_readcount_inc(d_inode(path->dentry));
-
- return file;
-}
-
-struct file *vfsub_filp_open(const char *path, int oflags, int mode)
-{
- struct file *file;
-
- lockdep_off();
- file = filp_open(path,
- oflags /* | __FMODE_NONOTIFY */,
- mode);
- lockdep_on();
- if (IS_ERR(file))
- goto out;
- vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
-
-out:
- return file;
-}
-
-/*
- * Ideally this function should call VFS:do_last() in order to keep all its
- * checkings. But it is very hard for aufs to regenerate several VFS internal
- * structure such as nameidata. This is a second (or third) best approach.
- * cf. linux/fs/namei.c:do_last(), lookup_open() and atomic_open().
- */
-int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
- struct vfsub_aopen_args *args, struct au_branch *br)
-{
- int err;
- struct file *file = args->file;
- /* copied from linux/fs/namei.c:atomic_open() */
- struct dentry *const DENTRY_NOT_SET = (void *)-1UL;
-
- IMustLock(dir);
- AuDebugOn(!dir->i_op->atomic_open);
-
- err = au_br_test_oflag(args->open_flag, br);
- if (unlikely(err))
- goto out;
-
- args->file->f_path.dentry = DENTRY_NOT_SET;
- args->file->f_path.mnt = au_br_mnt(br);
- err = dir->i_op->atomic_open(dir, dentry, file, args->open_flag,
- args->create_mode, args->opened);
- if (err >= 0) {
- /* some filesystems don't set FILE_CREATED while succeeded? */
- if (*args->opened & FILE_CREATED)
- fsnotify_create(dir, dentry);
- } else
- goto out;
-
-
- if (!err) {
- /* todo: call VFS:may_open() here */
- err = open_check_o_direct(file);
- /* todo: ima_file_check() too? */
- if (!err && (args->open_flag & __FMODE_EXEC))
- err = deny_write_access(file);
- if (unlikely(err))
- /* note that the file is created and still opened */
- goto out;
- }
-
- atomic_inc(&br->br_count);
- fsnotify_open(file);
-
-out:
- return err;
-}
-
-int vfsub_kern_path(const char *name, unsigned int flags, struct path *path)
-{
- int err;
-
- err = kern_path(name, flags, path);
- if (!err && d_is_positive(path->dentry))
- vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
- return err;
-}
-
-struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
- int len)
-{
- struct path path = {
- .mnt = NULL
- };
-
- /* VFS checks it too, but by WARN_ON_ONCE() */
- IMustLock(d_inode(parent));
-
- path.dentry = lookup_one_len(name, parent, len);
- if (IS_ERR(path.dentry))
- goto out;
- if (d_is_positive(path.dentry))
- vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
-
-out:
- AuTraceErrPtr(path.dentry);
- return path.dentry;
-}
-
-void vfsub_call_lkup_one(void *args)
-{
- struct vfsub_lkup_one_args *a = args;
- *a->errp = vfsub_lkup_one(a->name, a->parent);
-}
-
-/* ---------------------------------------------------------------------- */
-
-struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
- struct dentry *d2, struct au_hinode *hdir2)
-{
- struct dentry *d;
-
- lockdep_off();
- d = lock_rename(d1, d2);
- lockdep_on();
- au_hn_suspend(hdir1);
- if (hdir1 != hdir2)
- au_hn_suspend(hdir2);
-
- return d;
-}
-
-void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
- struct dentry *d2, struct au_hinode *hdir2)
-{
- au_hn_resume(hdir1);
- if (hdir1 != hdir2)
- au_hn_resume(hdir2);
- lockdep_off();
- unlock_rename(d1, d2);
- lockdep_on();
-}
-
-/* ---------------------------------------------------------------------- */
-
-int vfsub_create(struct inode *dir, struct path *path, int mode, bool want_excl)
-{
- int err;
- struct dentry *d;
-
- IMustLock(dir);
-
- d = path->dentry;
- path->dentry = d->d_parent;
- err = security_path_mknod(path, d, mode, 0);
- path->dentry = d;
- if (unlikely(err))
- goto out;
-
- lockdep_off();
- err = vfs_create(dir, path->dentry, mode, want_excl);
- lockdep_on();
- if (!err) {
- struct path tmp = *path;
- int did;
-
- vfsub_update_h_iattr(&tmp, &did);
- if (did) {
- tmp.dentry = path->dentry->d_parent;
- vfsub_update_h_iattr(&tmp, /*did*/NULL);
- }
- /*ignore*/
- }
-
-out:
- return err;
-}
-
-int vfsub_symlink(struct inode *dir, struct path *path, const char *symname)
-{
- int err;
- struct dentry *d;
-
- IMustLock(dir);
-
- d = path->dentry;
- path->dentry = d->d_parent;
- err = security_path_symlink(path, d, symname);
- path->dentry = d;
- if (unlikely(err))
- goto out;
-
- lockdep_off();
- err = vfs_symlink(dir, path->dentry, symname);
- lockdep_on();
- if (!err) {
- struct path tmp = *path;
- int did;
-
- vfsub_update_h_iattr(&tmp, &did);
- if (did) {
- tmp.dentry = path->dentry->d_parent;
- vfsub_update_h_iattr(&tmp, /*did*/NULL);
- }
- /*ignore*/
- }
-
-out:
- return err;
-}
-
-int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev)
-{
- int err;
- struct dentry *d;
-
- IMustLock(dir);
-
- d = path->dentry;
- path->dentry = d->d_parent;
- err = security_path_mknod(path, d, mode, new_encode_dev(dev));
- path->dentry = d;
- if (unlikely(err))
- goto out;
-
- lockdep_off();
- err = vfs_mknod(dir, path->dentry, mode, dev);
- lockdep_on();
- if (!err) {
- struct path tmp = *path;
- int did;
-
- vfsub_update_h_iattr(&tmp, &did);
- if (did) {
- tmp.dentry = path->dentry->d_parent;
- vfsub_update_h_iattr(&tmp, /*did*/NULL);
- }
- /*ignore*/
- }
-
-out:
- return err;
-}
-
-static int au_test_nlink(struct inode *inode)
-{
- const unsigned int link_max = UINT_MAX >> 1; /* rough margin */
-
- if (!au_test_fs_no_limit_nlink(inode->i_sb)
- || inode->i_nlink < link_max)
- return 0;
- return -EMLINK;
-}
-
-int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path,
- struct inode **delegated_inode)
-{
- int err;
- struct dentry *d;
-
- IMustLock(dir);
-
- err = au_test_nlink(d_inode(src_dentry));
- if (unlikely(err))
- return err;
-
- /* we don't call may_linkat() */
- d = path->dentry;
- path->dentry = d->d_parent;
- err = security_path_link(src_dentry, path, d);
- path->dentry = d;
- if (unlikely(err))
- goto out;
-
- lockdep_off();
- err = vfs_link(src_dentry, dir, path->dentry, delegated_inode);
- lockdep_on();
- if (!err) {
- struct path tmp = *path;
- int did;
-
- /* fuse has different memory inode for the same inumber */
- vfsub_update_h_iattr(&tmp, &did);
- if (did) {
- tmp.dentry = path->dentry->d_parent;
- vfsub_update_h_iattr(&tmp, /*did*/NULL);
- tmp.dentry = src_dentry;
- vfsub_update_h_iattr(&tmp, /*did*/NULL);
- }
- /*ignore*/
- }
-
-out:
- return err;
-}
-
-int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry,
- struct inode *dir, struct path *path,
- struct inode **delegated_inode)
-{
- int err;
- struct path tmp = {
- .mnt = path->mnt
- };
- struct dentry *d;
-
- IMustLock(dir);
- IMustLock(src_dir);
-
- d = path->dentry;
- path->dentry = d->d_parent;
- tmp.dentry = src_dentry->d_parent;
- err = security_path_rename(&tmp, src_dentry, path, d, /*flags*/0);
- path->dentry = d;
- if (unlikely(err))
- goto out;
-
- lockdep_off();
- err = vfs_rename(src_dir, src_dentry, dir, path->dentry,
- delegated_inode, /*flags*/0);
- lockdep_on();
- if (!err) {
- int did;
-
- tmp.dentry = d->d_parent;
- vfsub_update_h_iattr(&tmp, &did);
- if (did) {
- tmp.dentry = src_dentry;
- vfsub_update_h_iattr(&tmp, /*did*/NULL);
- tmp.dentry = src_dentry->d_parent;
- vfsub_update_h_iattr(&tmp, /*did*/NULL);
- }
- /*ignore*/
- }
-
-out:
- return err;
-}
-
-int vfsub_mkdir(struct inode *dir, struct path *path, int mode)
-{
- int err;
- struct dentry *d;
-
- IMustLock(dir);
-
- d = path->dentry;
- path->dentry = d->d_parent;
- err = security_path_mkdir(path, d, mode);
- path->dentry = d;
- if (unlikely(err))
- goto out;
-
- lockdep_off();
- err = vfs_mkdir(dir, path->dentry, mode);
- lockdep_on();
- if (!err) {
- struct path tmp = *path;
- int did;
-
- vfsub_update_h_iattr(&tmp, &did);
- if (did) {
- tmp.dentry = path->dentry->d_parent;
- vfsub_update_h_iattr(&tmp, /*did*/NULL);
- }
- /*ignore*/
- }
-
-out:
- return err;
-}
-
-int vfsub_rmdir(struct inode *dir, struct path *path)
-{
- int err;
- struct dentry *d;
-
- IMustLock(dir);
-
- d = path->dentry;
- path->dentry = d->d_parent;
- err = security_path_rmdir(path, d);
- path->dentry = d;
- if (unlikely(err))
- goto out;
-
- lockdep_off();
- err = vfs_rmdir(dir, path->dentry);
- lockdep_on();
- if (!err) {
- struct path tmp = {
- .dentry = path->dentry->d_parent,
- .mnt = path->mnt
- };
-
- vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
- }
-
-out:
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* todo: support mmap_sem? */
-ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
- loff_t *ppos)
-{
- ssize_t err;
-
- lockdep_off();
- err = vfs_read(file, ubuf, count, ppos);
- lockdep_on();
- if (err >= 0)
- vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
- return err;
-}
-
-/* todo: kernel_read()? */
-ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
- loff_t *ppos)
-{
- ssize_t err;
- mm_segment_t oldfs;
- union {
- void *k;
- char __user *u;
- } buf;
-
- buf.k = kbuf;
- oldfs = get_fs();
- set_fs(KERNEL_DS);
- err = vfsub_read_u(file, buf.u, count, ppos);
- set_fs(oldfs);
- return err;
-}
-
-ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
- loff_t *ppos)
-{
- ssize_t err;
-
- lockdep_off();
- err = vfs_write(file, ubuf, count, ppos);
- lockdep_on();
- if (err >= 0)
- vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
- return err;
-}
-
-ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos)
-{
- ssize_t err;
- mm_segment_t oldfs;
- union {
- void *k;
- const char __user *u;
- } buf;
-
- buf.k = kbuf;
- oldfs = get_fs();
- set_fs(KERNEL_DS);
- err = vfsub_write_u(file, buf.u, count, ppos);
- set_fs(oldfs);
- return err;
-}
-
-int vfsub_flush(struct file *file, fl_owner_t id)
-{
- int err;
-
- err = 0;
- if (file->f_op->flush) {
- if (!au_test_nfs(file->f_path.dentry->d_sb))
- err = file->f_op->flush(file, id);
- else {
- lockdep_off();
- err = file->f_op->flush(file, id);
- lockdep_on();
- }
- if (!err)
- vfsub_update_h_iattr(&file->f_path, /*did*/NULL);
- /*ignore*/
- }
- return err;
-}
-
-int vfsub_iterate_dir(struct file *file, struct dir_context *ctx)
-{
- int err;
-
- AuDbg("%pD, ctx{%pf, %llu}\n", file, ctx->actor, ctx->pos);
-
- lockdep_off();
- err = iterate_dir(file, ctx);
- lockdep_on();
- if (err >= 0)
- vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
- return err;
-}
-
-long vfsub_splice_to(struct file *in, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t len,
- unsigned int flags)
-{
- long err;
-
- lockdep_off();
- err = do_splice_to(in, ppos, pipe, len, flags);
- lockdep_on();
- file_accessed(in);
- if (err >= 0)
- vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/
- return err;
-}
-
-long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
- loff_t *ppos, size_t len, unsigned int flags)
-{
- long err;
-
- lockdep_off();
- err = do_splice_from(pipe, out, ppos, len, flags);
- lockdep_on();
- if (err >= 0)
- vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/
- return err;
-}
-
-int vfsub_fsync(struct file *file, struct path *path, int datasync)
-{
- int err;
-
- /* file can be NULL */
- lockdep_off();
- err = vfs_fsync(file, datasync);
- lockdep_on();
- if (!err) {
- if (!path) {
- AuDebugOn(!file);
- path = &file->f_path;
- }
- vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
- }
- return err;
-}
-
-/* cf. open.c:do_sys_truncate() and do_sys_ftruncate() */
-int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
- struct file *h_file)
-{
- int err;
- struct inode *h_inode;
- struct super_block *h_sb;
-
- if (!h_file) {
- err = vfsub_truncate(h_path, length);
- goto out;
- }
-
- h_inode = d_inode(h_path->dentry);
- h_sb = h_inode->i_sb;
- lockdep_off();
- sb_start_write(h_sb);
- lockdep_on();
- err = locks_verify_truncate(h_inode, h_file, length);
- if (!err)
- err = security_path_truncate(h_path);
- if (!err) {
- lockdep_off();
- err = do_truncate(h_path->dentry, length, attr, h_file);
- lockdep_on();
- }
- lockdep_off();
- sb_end_write(h_sb);
- lockdep_on();
-
-out:
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-struct au_vfsub_mkdir_args {
- int *errp;
- struct inode *dir;
- struct path *path;
- int mode;
-};
-
-static void au_call_vfsub_mkdir(void *args)
-{
- struct au_vfsub_mkdir_args *a = args;
- *a->errp = vfsub_mkdir(a->dir, a->path, a->mode);
-}
-
-int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode)
-{
- int err, do_sio, wkq_err;
-
- do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
- if (!do_sio) {
- lockdep_off();
- err = vfsub_mkdir(dir, path, mode);
- lockdep_on();
- } else {
- struct au_vfsub_mkdir_args args = {
- .errp = &err,
- .dir = dir,
- .path = path,
- .mode = mode
- };
- wkq_err = au_wkq_wait(au_call_vfsub_mkdir, &args);
- if (unlikely(wkq_err))
- err = wkq_err;
- }
-
- return err;
-}
-
-struct au_vfsub_rmdir_args {
- int *errp;
- struct inode *dir;
- struct path *path;
-};
-
-static void au_call_vfsub_rmdir(void *args)
-{
- struct au_vfsub_rmdir_args *a = args;
- *a->errp = vfsub_rmdir(a->dir, a->path);
-}
-
-int vfsub_sio_rmdir(struct inode *dir, struct path *path)
-{
- int err, do_sio, wkq_err;
-
- do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
- if (!do_sio) {
- lockdep_off();
- err = vfsub_rmdir(dir, path);
- lockdep_on();
- } else {
- struct au_vfsub_rmdir_args args = {
- .errp = &err,
- .dir = dir,
- .path = path
- };
- wkq_err = au_wkq_wait(au_call_vfsub_rmdir, &args);
- if (unlikely(wkq_err))
- err = wkq_err;
- }
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-struct notify_change_args {
- int *errp;
- struct path *path;
- struct iattr *ia;
- struct inode **delegated_inode;
-};
-
-static void call_notify_change(void *args)
-{
- struct notify_change_args *a = args;
- struct inode *h_inode;
-
- h_inode = d_inode(a->path->dentry);
- IMustLock(h_inode);
-
- *a->errp = -EPERM;
- if (!IS_IMMUTABLE(h_inode) && !IS_APPEND(h_inode)) {
- lockdep_off();
- *a->errp = notify_change(a->path->dentry, a->ia,
- a->delegated_inode);
- lockdep_on();
- if (!*a->errp)
- vfsub_update_h_iattr(a->path, /*did*/NULL); /*ignore*/
- }
- AuTraceErr(*a->errp);
-}
-
-int vfsub_notify_change(struct path *path, struct iattr *ia,
- struct inode **delegated_inode)
-{
- int err;
- struct notify_change_args args = {
- .errp = &err,
- .path = path,
- .ia = ia,
- .delegated_inode = delegated_inode
- };
-
- call_notify_change(&args);
-
- return err;
-}
-
-int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
- struct inode **delegated_inode)
-{
- int err, wkq_err;
- struct notify_change_args args = {
- .errp = &err,
- .path = path,
- .ia = ia,
- .delegated_inode = delegated_inode
- };
-
- wkq_err = au_wkq_wait(call_notify_change, &args);
- if (unlikely(wkq_err))
- err = wkq_err;
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-struct unlink_args {
- int *errp;
- struct inode *dir;
- struct path *path;
- struct inode **delegated_inode;
-};
-
-static void call_unlink(void *args)
-{
- struct unlink_args *a = args;
- struct dentry *d = a->path->dentry;
- struct inode *h_inode;
- const int stop_sillyrename = (au_test_nfs(d->d_sb)
- && au_dcount(d) == 1);
-
- IMustLock(a->dir);
-
- a->path->dentry = d->d_parent;
- *a->errp = security_path_unlink(a->path, d);
- a->path->dentry = d;
- if (unlikely(*a->errp))
- return;
-
- if (!stop_sillyrename)
- dget(d);
- h_inode = NULL;
- if (d_is_positive(d)) {
- h_inode = d_inode(d);
- ihold(h_inode);
- }
-
- lockdep_off();
- *a->errp = vfs_unlink(a->dir, d, a->delegated_inode);
- lockdep_on();
- if (!*a->errp) {
- struct path tmp = {
- .dentry = d->d_parent,
- .mnt = a->path->mnt
- };
- vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
- }
-
- if (!stop_sillyrename)
- dput(d);
- if (h_inode)
- iput(h_inode);
-
- AuTraceErr(*a->errp);
-}
-
-/*
- * @dir: must be locked.
- * @dentry: target dentry.
- */
-int vfsub_unlink(struct inode *dir, struct path *path,
- struct inode **delegated_inode, int force)
-{
- int err;
- struct unlink_args args = {
- .errp = &err,
- .dir = dir,
- .path = path,
- .delegated_inode = delegated_inode
- };
-
- if (!force)
- call_unlink(&args);
- else {
- int wkq_err;
-
- wkq_err = au_wkq_wait(call_unlink, &args);
- if (unlikely(wkq_err))
- err = wkq_err;
- }
-
- return err;
-}
diff --git a/fs/aufs/vfsub.h b/fs/aufs/vfsub.h
deleted file mode 100644
index f2e1c49af..000000000
--- a/fs/aufs/vfsub.h
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * sub-routines for VFS
- */
-
-#ifndef __AUFS_VFSUB_H__
-#define __AUFS_VFSUB_H__
-
-#ifdef __KERNEL__
-
-#include <linux/fs.h>
-#include <linux/mount.h>
-#include <linux/posix_acl.h>
-#include <linux/xattr.h>
-#include "debug.h"
-
-/* copied from linux/fs/internal.h */
-/* todo: BAD approach!! */
-extern void __mnt_drop_write(struct vfsmount *);
-extern int open_check_o_direct(struct file *f);
-
-/* ---------------------------------------------------------------------- */
-
-/* lock subclass for lower inode */
-/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */
-/* reduce? gave up. */
-enum {
- AuLsc_I_Begin = I_MUTEX_PARENT2, /* 5 */
- AuLsc_I_PARENT, /* lower inode, parent first */
- AuLsc_I_PARENT2, /* copyup dirs */
- AuLsc_I_PARENT3, /* copyup wh */
- AuLsc_I_CHILD,
- AuLsc_I_CHILD2,
- AuLsc_I_End
-};
-
-/* to debug easier, do not make them inlined functions */
-#define MtxMustLock(mtx) AuDebugOn(!mutex_is_locked(mtx))
-#define IMustLock(i) MtxMustLock(&(i)->i_mutex)
-
-/* ---------------------------------------------------------------------- */
-
-static inline void vfsub_drop_nlink(struct inode *inode)
-{
- AuDebugOn(!inode->i_nlink);
- drop_nlink(inode);
-}
-
-static inline void vfsub_dead_dir(struct inode *inode)
-{
- AuDebugOn(!S_ISDIR(inode->i_mode));
- inode->i_flags |= S_DEAD;
- clear_nlink(inode);
-}
-
-static inline int vfsub_native_ro(struct inode *inode)
-{
- return (inode->i_sb->s_flags & MS_RDONLY)
- || IS_RDONLY(inode)
- /* || IS_APPEND(inode) */
- || IS_IMMUTABLE(inode);
-}
-
-#ifdef CONFIG_AUFS_BR_FUSE
-int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb);
-#else
-AuStubInt0(vfsub_test_mntns, struct vfsmount *mnt, struct super_block *h_sb);
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-int vfsub_update_h_iattr(struct path *h_path, int *did);
-struct file *vfsub_dentry_open(struct path *path, int flags);
-struct file *vfsub_filp_open(const char *path, int oflags, int mode);
-struct vfsub_aopen_args {
- struct file *file;
- unsigned int open_flag;
- umode_t create_mode;
- int *opened;
-};
-struct au_branch;
-int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
- struct vfsub_aopen_args *args, struct au_branch *br);
-int vfsub_kern_path(const char *name, unsigned int flags, struct path *path);
-
-struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
- int len);
-
-struct vfsub_lkup_one_args {
- struct dentry **errp;
- struct qstr *name;
- struct dentry *parent;
-};
-
-static inline struct dentry *vfsub_lkup_one(struct qstr *name,
- struct dentry *parent)
-{
- return vfsub_lookup_one_len(name->name, parent, name->len);
-}
-
-void vfsub_call_lkup_one(void *args);
-
-/* ---------------------------------------------------------------------- */
-
-static inline int vfsub_mnt_want_write(struct vfsmount *mnt)
-{
- int err;
-
- lockdep_off();
- err = mnt_want_write(mnt);
- lockdep_on();
- return err;
-}
-
-static inline void vfsub_mnt_drop_write(struct vfsmount *mnt)
-{
- lockdep_off();
- mnt_drop_write(mnt);
- lockdep_on();
-}
-
-#if 0 /* reserved */
-static inline void vfsub_mnt_drop_write_file(struct file *file)
-{
- lockdep_off();
- mnt_drop_write_file(file);
- lockdep_on();
-}
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-struct au_hinode;
-struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
- struct dentry *d2, struct au_hinode *hdir2);
-void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
- struct dentry *d2, struct au_hinode *hdir2);
-
-int vfsub_create(struct inode *dir, struct path *path, int mode,
- bool want_excl);
-int vfsub_symlink(struct inode *dir, struct path *path,
- const char *symname);
-int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev);
-int vfsub_link(struct dentry *src_dentry, struct inode *dir,
- struct path *path, struct inode **delegated_inode);
-int vfsub_rename(struct inode *src_hdir, struct dentry *src_dentry,
- struct inode *hdir, struct path *path,
- struct inode **delegated_inode);
-int vfsub_mkdir(struct inode *dir, struct path *path, int mode);
-int vfsub_rmdir(struct inode *dir, struct path *path);
-
-/* ---------------------------------------------------------------------- */
-
-ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
- loff_t *ppos);
-ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
- loff_t *ppos);
-ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
- loff_t *ppos);
-ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count,
- loff_t *ppos);
-int vfsub_flush(struct file *file, fl_owner_t id);
-int vfsub_iterate_dir(struct file *file, struct dir_context *ctx);
-
-static inline loff_t vfsub_f_size_read(struct file *file)
-{
- return i_size_read(file_inode(file));
-}
-
-static inline unsigned int vfsub_file_flags(struct file *file)
-{
- unsigned int flags;
-
- spin_lock(&file->f_lock);
- flags = file->f_flags;
- spin_unlock(&file->f_lock);
-
- return flags;
-}
-
-#if 0 /* reserved */
-static inline void vfsub_file_accessed(struct file *h_file)
-{
- file_accessed(h_file);
- vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); /*ignore*/
-}
-#endif
-
-#if 0 /* reserved */
-static inline void vfsub_touch_atime(struct vfsmount *h_mnt,
- struct dentry *h_dentry)
-{
- struct path h_path = {
- .dentry = h_dentry,
- .mnt = h_mnt
- };
- touch_atime(&h_path);
- vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
-}
-#endif
-
-static inline int vfsub_update_time(struct inode *h_inode, struct timespec *ts,
- int flags)
-{
- return generic_update_time(h_inode, ts, flags);
- /* no vfsub_update_h_iattr() since we don't have struct path */
-}
-
-#ifdef CONFIG_FS_POSIX_ACL
-static inline int vfsub_acl_chmod(struct inode *h_inode, umode_t h_mode)
-{
- int err;
-
- err = posix_acl_chmod(h_inode, h_mode);
- if (err == -EOPNOTSUPP)
- err = 0;
- return err;
-}
-#else
-AuStubInt0(vfsub_acl_chmod, struct inode *h_inode, umode_t h_mode);
-#endif
-
-long vfsub_splice_to(struct file *in, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t len,
- unsigned int flags);
-long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
- loff_t *ppos, size_t len, unsigned int flags);
-
-static inline long vfsub_truncate(struct path *path, loff_t length)
-{
- long err;
-
- lockdep_off();
- err = vfs_truncate(path, length);
- lockdep_on();
- return err;
-}
-
-int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
- struct file *h_file);
-int vfsub_fsync(struct file *file, struct path *path, int datasync);
-
-/* ---------------------------------------------------------------------- */
-
-static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin)
-{
- loff_t err;
-
- lockdep_off();
- err = vfs_llseek(file, offset, origin);
- lockdep_on();
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode);
-int vfsub_sio_rmdir(struct inode *dir, struct path *path);
-int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
- struct inode **delegated_inode);
-int vfsub_notify_change(struct path *path, struct iattr *ia,
- struct inode **delegated_inode);
-int vfsub_unlink(struct inode *dir, struct path *path,
- struct inode **delegated_inode, int force);
-
-/* ---------------------------------------------------------------------- */
-
-static inline int vfsub_setxattr(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags)
-{
- int err;
-
- lockdep_off();
- err = vfs_setxattr(dentry, name, value, size, flags);
- lockdep_on();
-
- return err;
-}
-
-static inline int vfsub_removexattr(struct dentry *dentry, const char *name)
-{
- int err;
-
- lockdep_off();
- err = vfs_removexattr(dentry, name);
- lockdep_on();
-
- return err;
-}
-
-#endif /* __KERNEL__ */
-#endif /* __AUFS_VFSUB_H__ */
diff --git a/fs/aufs/wbr_policy.c b/fs/aufs/wbr_policy.c
deleted file mode 100644
index c822b428d..000000000
--- a/fs/aufs/wbr_policy.c
+++ /dev/null
@@ -1,752 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * policies for selecting one among multiple writable branches
- */
-
-#include <linux/statfs.h>
-#include "aufs.h"
-
-/* subset of cpup_attr() */
-static noinline_for_stack
-int au_cpdown_attr(struct path *h_path, struct dentry *h_src)
-{
- int err, sbits;
- struct iattr ia;
- struct inode *h_isrc;
-
- h_isrc = d_inode(h_src);
- ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID;
- ia.ia_mode = h_isrc->i_mode;
- ia.ia_uid = h_isrc->i_uid;
- ia.ia_gid = h_isrc->i_gid;
- sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID));
- au_cpup_attr_flags(d_inode(h_path->dentry), h_isrc->i_flags);
- /* no delegation since it is just created */
- err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
-
- /* is this nfs only? */
- if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) {
- ia.ia_valid = ATTR_FORCE | ATTR_MODE;
- ia.ia_mode = h_isrc->i_mode;
- err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
- }
-
- return err;
-}
-
-#define AuCpdown_PARENT_OPQ 1
-#define AuCpdown_WHED (1 << 1)
-#define AuCpdown_MADE_DIR (1 << 2)
-#define AuCpdown_DIROPQ (1 << 3)
-#define au_ftest_cpdown(flags, name) ((flags) & AuCpdown_##name)
-#define au_fset_cpdown(flags, name) \
- do { (flags) |= AuCpdown_##name; } while (0)
-#define au_fclr_cpdown(flags, name) \
- do { (flags) &= ~AuCpdown_##name; } while (0)
-
-static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst,
- unsigned int *flags)
-{
- int err;
- struct dentry *opq_dentry;
-
- opq_dentry = au_diropq_create(dentry, bdst);
- err = PTR_ERR(opq_dentry);
- if (IS_ERR(opq_dentry))
- goto out;
- dput(opq_dentry);
- au_fset_cpdown(*flags, DIROPQ);
-
-out:
- return err;
-}
-
-static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent,
- struct inode *dir, aufs_bindex_t bdst)
-{
- int err;
- struct path h_path;
- struct au_branch *br;
-
- br = au_sbr(dentry->d_sb, bdst);
- h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
- err = PTR_ERR(h_path.dentry);
- if (IS_ERR(h_path.dentry))
- goto out;
-
- err = 0;
- if (d_is_positive(h_path.dentry)) {
- h_path.mnt = au_br_mnt(br);
- err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path,
- dentry);
- }
- dput(h_path.dentry);
-
-out:
- return err;
-}
-
-static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
- struct au_pin *pin,
- struct dentry *h_parent, void *arg)
-{
- int err, rerr;
- aufs_bindex_t bopq, bstart;
- struct path h_path;
- struct dentry *parent;
- struct inode *h_dir, *h_inode, *inode, *dir;
- unsigned int *flags = arg;
-
- bstart = au_dbstart(dentry);
- /* dentry is di-locked */
- parent = dget_parent(dentry);
- dir = d_inode(parent);
- h_dir = d_inode(h_parent);
- AuDebugOn(h_dir != au_h_iptr(dir, bdst));
- IMustLock(h_dir);
-
- err = au_lkup_neg(dentry, bdst, /*wh*/0);
- if (unlikely(err < 0))
- goto out;
- h_path.dentry = au_h_dptr(dentry, bdst);
- h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst);
- err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path,
- S_IRWXU | S_IRUGO | S_IXUGO);
- if (unlikely(err))
- goto out_put;
- au_fset_cpdown(*flags, MADE_DIR);
-
- bopq = au_dbdiropq(dentry);
- au_fclr_cpdown(*flags, WHED);
- au_fclr_cpdown(*flags, DIROPQ);
- if (au_dbwh(dentry) == bdst)
- au_fset_cpdown(*flags, WHED);
- if (!au_ftest_cpdown(*flags, PARENT_OPQ) && bopq <= bdst)
- au_fset_cpdown(*flags, PARENT_OPQ);
- h_inode = d_inode(h_path.dentry);
- mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
- if (au_ftest_cpdown(*flags, WHED)) {
- err = au_cpdown_dir_opq(dentry, bdst, flags);
- if (unlikely(err)) {
- mutex_unlock(&h_inode->i_mutex);
- goto out_dir;
- }
- }
-
- err = au_cpdown_attr(&h_path, au_h_dptr(dentry, bstart));
- mutex_unlock(&h_inode->i_mutex);
- if (unlikely(err))
- goto out_opq;
-
- if (au_ftest_cpdown(*flags, WHED)) {
- err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst);
- if (unlikely(err))
- goto out_opq;
- }
-
- inode = d_inode(dentry);
- if (au_ibend(inode) < bdst)
- au_set_ibend(inode, bdst);
- au_set_h_iptr(inode, bdst, au_igrab(h_inode),
- au_hi_flags(inode, /*isdir*/1));
- au_fhsm_wrote(dentry->d_sb, bdst, /*force*/0);
- goto out; /* success */
-
- /* revert */
-out_opq:
- if (au_ftest_cpdown(*flags, DIROPQ)) {
- mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
- rerr = au_diropq_remove(dentry, bdst);
- mutex_unlock(&h_inode->i_mutex);
- if (unlikely(rerr)) {
- AuIOErr("failed removing diropq for %pd b%d (%d)\n",
- dentry, bdst, rerr);
- err = -EIO;
- goto out;
- }
- }
-out_dir:
- if (au_ftest_cpdown(*flags, MADE_DIR)) {
- rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path);
- if (unlikely(rerr)) {
- AuIOErr("failed removing %pd b%d (%d)\n",
- dentry, bdst, rerr);
- err = -EIO;
- }
- }
-out_put:
- au_set_h_dptr(dentry, bdst, NULL);
- if (au_dbend(dentry) == bdst)
- au_update_dbend(dentry);
-out:
- dput(parent);
- return err;
-}
-
-int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst)
-{
- int err;
- unsigned int flags;
-
- flags = 0;
- err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &flags);
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* policies for create */
-
-int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex)
-{
- int err, i, j, ndentry;
- aufs_bindex_t bopq;
- struct au_dcsub_pages dpages;
- struct au_dpage *dpage;
- struct dentry **dentries, *parent, *d;
-
- err = au_dpages_init(&dpages, GFP_NOFS);
- if (unlikely(err))
- goto out;
- parent = dget_parent(dentry);
- err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/0);
- if (unlikely(err))
- goto out_free;
-
- err = bindex;
- for (i = 0; i < dpages.ndpage; i++) {
- dpage = dpages.dpages + i;
- dentries = dpage->dentries;
- ndentry = dpage->ndentry;
- for (j = 0; j < ndentry; j++) {
- d = dentries[j];
- di_read_lock_parent2(d, !AuLock_IR);
- bopq = au_dbdiropq(d);
- di_read_unlock(d, !AuLock_IR);
- if (bopq >= 0 && bopq < err)
- err = bopq;
- }
- }
-
-out_free:
- dput(parent);
- au_dpages_free(&dpages);
-out:
- return err;
-}
-
-static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex)
-{
- for (; bindex >= 0; bindex--)
- if (!au_br_rdonly(au_sbr(sb, bindex)))
- return bindex;
- return -EROFS;
-}
-
-/* top down parent */
-static int au_wbr_create_tdp(struct dentry *dentry,
- unsigned int flags __maybe_unused)
-{
- int err;
- aufs_bindex_t bstart, bindex;
- struct super_block *sb;
- struct dentry *parent, *h_parent;
-
- sb = dentry->d_sb;
- bstart = au_dbstart(dentry);
- err = bstart;
- if (!au_br_rdonly(au_sbr(sb, bstart)))
- goto out;
-
- err = -EROFS;
- parent = dget_parent(dentry);
- for (bindex = au_dbstart(parent); bindex < bstart; bindex++) {
- h_parent = au_h_dptr(parent, bindex);
- if (!h_parent || d_is_negative(h_parent))
- continue;
-
- if (!au_br_rdonly(au_sbr(sb, bindex))) {
- err = bindex;
- break;
- }
- }
- dput(parent);
-
- /* bottom up here */
- if (unlikely(err < 0)) {
- err = au_wbr_bu(sb, bstart - 1);
- if (err >= 0)
- err = au_wbr_nonopq(dentry, err);
- }
-
-out:
- AuDbg("b%d\n", err);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* an exception for the policy other than tdp */
-static int au_wbr_create_exp(struct dentry *dentry)
-{
- int err;
- aufs_bindex_t bwh, bdiropq;
- struct dentry *parent;
-
- err = -1;
- bwh = au_dbwh(dentry);
- parent = dget_parent(dentry);
- bdiropq = au_dbdiropq(parent);
- if (bwh >= 0) {
- if (bdiropq >= 0)
- err = min(bdiropq, bwh);
- else
- err = bwh;
- AuDbg("%d\n", err);
- } else if (bdiropq >= 0) {
- err = bdiropq;
- AuDbg("%d\n", err);
- }
- dput(parent);
-
- if (err >= 0)
- err = au_wbr_nonopq(dentry, err);
-
- if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err)))
- err = -1;
-
- AuDbg("%d\n", err);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* round robin */
-static int au_wbr_create_init_rr(struct super_block *sb)
-{
- int err;
-
- err = au_wbr_bu(sb, au_sbend(sb));
- atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */
- /* smp_mb(); */
-
- AuDbg("b%d\n", err);
- return err;
-}
-
-static int au_wbr_create_rr(struct dentry *dentry, unsigned int flags)
-{
- int err, nbr;
- unsigned int u;
- aufs_bindex_t bindex, bend;
- struct super_block *sb;
- atomic_t *next;
-
- err = au_wbr_create_exp(dentry);
- if (err >= 0)
- goto out;
-
- sb = dentry->d_sb;
- next = &au_sbi(sb)->si_wbr_rr_next;
- bend = au_sbend(sb);
- nbr = bend + 1;
- for (bindex = 0; bindex <= bend; bindex++) {
- if (!au_ftest_wbr(flags, DIR)) {
- err = atomic_dec_return(next) + 1;
- /* modulo for 0 is meaningless */
- if (unlikely(!err))
- err = atomic_dec_return(next) + 1;
- } else
- err = atomic_read(next);
- AuDbg("%d\n", err);
- u = err;
- err = u % nbr;
- AuDbg("%d\n", err);
- if (!au_br_rdonly(au_sbr(sb, err)))
- break;
- err = -EROFS;
- }
-
- if (err >= 0)
- err = au_wbr_nonopq(dentry, err);
-
-out:
- AuDbg("%d\n", err);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* most free space */
-static void au_mfs(struct dentry *dentry, struct dentry *parent)
-{
- struct super_block *sb;
- struct au_branch *br;
- struct au_wbr_mfs *mfs;
- struct dentry *h_parent;
- aufs_bindex_t bindex, bend;
- int err;
- unsigned long long b, bavail;
- struct path h_path;
- /* reduce the stack usage */
- struct kstatfs *st;
-
- st = kmalloc(sizeof(*st), GFP_NOFS);
- if (unlikely(!st)) {
- AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM);
- return;
- }
-
- bavail = 0;
- sb = dentry->d_sb;
- mfs = &au_sbi(sb)->si_wbr_mfs;
- MtxMustLock(&mfs->mfs_lock);
- mfs->mfs_bindex = -EROFS;
- mfs->mfsrr_bytes = 0;
- if (!parent) {
- bindex = 0;
- bend = au_sbend(sb);
- } else {
- bindex = au_dbstart(parent);
- bend = au_dbtaildir(parent);
- }
-
- for (; bindex <= bend; bindex++) {
- if (parent) {
- h_parent = au_h_dptr(parent, bindex);
- if (!h_parent || d_is_negative(h_parent))
- continue;
- }
- br = au_sbr(sb, bindex);
- if (au_br_rdonly(br))
- continue;
-
- /* sb->s_root for NFS is unreliable */
- h_path.mnt = au_br_mnt(br);
- h_path.dentry = h_path.mnt->mnt_root;
- err = vfs_statfs(&h_path, st);
- if (unlikely(err)) {
- AuWarn1("failed statfs, b%d, %d\n", bindex, err);
- continue;
- }
-
- /* when the available size is equal, select the lower one */
- BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail)
- || sizeof(b) < sizeof(st->f_bsize));
- b = st->f_bavail * st->f_bsize;
- br->br_wbr->wbr_bytes = b;
- if (b >= bavail) {
- bavail = b;
- mfs->mfs_bindex = bindex;
- mfs->mfs_jiffy = jiffies;
- }
- }
-
- mfs->mfsrr_bytes = bavail;
- AuDbg("b%d\n", mfs->mfs_bindex);
- kfree(st);
-}
-
-static int au_wbr_create_mfs(struct dentry *dentry, unsigned int flags)
-{
- int err;
- struct dentry *parent;
- struct super_block *sb;
- struct au_wbr_mfs *mfs;
-
- err = au_wbr_create_exp(dentry);
- if (err >= 0)
- goto out;
-
- sb = dentry->d_sb;
- parent = NULL;
- if (au_ftest_wbr(flags, PARENT))
- parent = dget_parent(dentry);
- mfs = &au_sbi(sb)->si_wbr_mfs;
- mutex_lock(&mfs->mfs_lock);
- if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
- || mfs->mfs_bindex < 0
- || au_br_rdonly(au_sbr(sb, mfs->mfs_bindex)))
- au_mfs(dentry, parent);
- mutex_unlock(&mfs->mfs_lock);
- err = mfs->mfs_bindex;
- dput(parent);
-
- if (err >= 0)
- err = au_wbr_nonopq(dentry, err);
-
-out:
- AuDbg("b%d\n", err);
- return err;
-}
-
-static int au_wbr_create_init_mfs(struct super_block *sb)
-{
- struct au_wbr_mfs *mfs;
-
- mfs = &au_sbi(sb)->si_wbr_mfs;
- mutex_init(&mfs->mfs_lock);
- mfs->mfs_jiffy = 0;
- mfs->mfs_bindex = -EROFS;
-
- return 0;
-}
-
-static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused)
-{
- mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock);
- return 0;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* most free space and then round robin */
-static int au_wbr_create_mfsrr(struct dentry *dentry, unsigned int flags)
-{
- int err;
- struct au_wbr_mfs *mfs;
-
- err = au_wbr_create_mfs(dentry, flags);
- if (err >= 0) {
- mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs;
- mutex_lock(&mfs->mfs_lock);
- if (mfs->mfsrr_bytes < mfs->mfsrr_watermark)
- err = au_wbr_create_rr(dentry, flags);
- mutex_unlock(&mfs->mfs_lock);
- }
-
- AuDbg("b%d\n", err);
- return err;
-}
-
-static int au_wbr_create_init_mfsrr(struct super_block *sb)
-{
- int err;
-
- au_wbr_create_init_mfs(sb); /* ignore */
- err = au_wbr_create_init_rr(sb);
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* top down parent and most free space */
-static int au_wbr_create_pmfs(struct dentry *dentry, unsigned int flags)
-{
- int err, e2;
- unsigned long long b;
- aufs_bindex_t bindex, bstart, bend;
- struct super_block *sb;
- struct dentry *parent, *h_parent;
- struct au_branch *br;
-
- err = au_wbr_create_tdp(dentry, flags);
- if (unlikely(err < 0))
- goto out;
- parent = dget_parent(dentry);
- bstart = au_dbstart(parent);
- bend = au_dbtaildir(parent);
- if (bstart == bend)
- goto out_parent; /* success */
-
- e2 = au_wbr_create_mfs(dentry, flags);
- if (e2 < 0)
- goto out_parent; /* success */
-
- /* when the available size is equal, select upper one */
- sb = dentry->d_sb;
- br = au_sbr(sb, err);
- b = br->br_wbr->wbr_bytes;
- AuDbg("b%d, %llu\n", err, b);
-
- for (bindex = bstart; bindex <= bend; bindex++) {
- h_parent = au_h_dptr(parent, bindex);
- if (!h_parent || d_is_negative(h_parent))
- continue;
-
- br = au_sbr(sb, bindex);
- if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) {
- b = br->br_wbr->wbr_bytes;
- err = bindex;
- AuDbg("b%d, %llu\n", err, b);
- }
- }
-
- if (err >= 0)
- err = au_wbr_nonopq(dentry, err);
-
-out_parent:
- dput(parent);
-out:
- AuDbg("b%d\n", err);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * - top down parent
- * - most free space with parent
- * - most free space round-robin regardless parent
- */
-static int au_wbr_create_pmfsrr(struct dentry *dentry, unsigned int flags)
-{
- int err;
- unsigned long long watermark;
- struct super_block *sb;
- struct au_branch *br;
- struct au_wbr_mfs *mfs;
-
- err = au_wbr_create_pmfs(dentry, flags | AuWbr_PARENT);
- if (unlikely(err < 0))
- goto out;
-
- sb = dentry->d_sb;
- br = au_sbr(sb, err);
- mfs = &au_sbi(sb)->si_wbr_mfs;
- mutex_lock(&mfs->mfs_lock);
- watermark = mfs->mfsrr_watermark;
- mutex_unlock(&mfs->mfs_lock);
- if (br->br_wbr->wbr_bytes < watermark)
- /* regardless the parent dir */
- err = au_wbr_create_mfsrr(dentry, flags);
-
-out:
- AuDbg("b%d\n", err);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* policies for copyup */
-
-/* top down parent */
-static int au_wbr_copyup_tdp(struct dentry *dentry)
-{
- return au_wbr_create_tdp(dentry, /*flags, anything is ok*/0);
-}
-
-/* bottom up parent */
-static int au_wbr_copyup_bup(struct dentry *dentry)
-{
- int err;
- aufs_bindex_t bindex, bstart;
- struct dentry *parent, *h_parent;
- struct super_block *sb;
-
- err = -EROFS;
- sb = dentry->d_sb;
- parent = dget_parent(dentry);
- bstart = au_dbstart(parent);
- for (bindex = au_dbstart(dentry); bindex >= bstart; bindex--) {
- h_parent = au_h_dptr(parent, bindex);
- if (!h_parent || d_is_negative(h_parent))
- continue;
-
- if (!au_br_rdonly(au_sbr(sb, bindex))) {
- err = bindex;
- break;
- }
- }
- dput(parent);
-
- /* bottom up here */
- if (unlikely(err < 0))
- err = au_wbr_bu(sb, bstart - 1);
-
- AuDbg("b%d\n", err);
- return err;
-}
-
-/* bottom up */
-int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t bstart)
-{
- int err;
-
- err = au_wbr_bu(dentry->d_sb, bstart);
- AuDbg("b%d\n", err);
- if (err > bstart)
- err = au_wbr_nonopq(dentry, err);
-
- AuDbg("b%d\n", err);
- return err;
-}
-
-static int au_wbr_copyup_bu(struct dentry *dentry)
-{
- int err;
- aufs_bindex_t bstart;
-
- bstart = au_dbstart(dentry);
- err = au_wbr_do_copyup_bu(dentry, bstart);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-struct au_wbr_copyup_operations au_wbr_copyup_ops[] = {
- [AuWbrCopyup_TDP] = {
- .copyup = au_wbr_copyup_tdp
- },
- [AuWbrCopyup_BUP] = {
- .copyup = au_wbr_copyup_bup
- },
- [AuWbrCopyup_BU] = {
- .copyup = au_wbr_copyup_bu
- }
-};
-
-struct au_wbr_create_operations au_wbr_create_ops[] = {
- [AuWbrCreate_TDP] = {
- .create = au_wbr_create_tdp
- },
- [AuWbrCreate_RR] = {
- .create = au_wbr_create_rr,
- .init = au_wbr_create_init_rr
- },
- [AuWbrCreate_MFS] = {
- .create = au_wbr_create_mfs,
- .init = au_wbr_create_init_mfs,
- .fin = au_wbr_create_fin_mfs
- },
- [AuWbrCreate_MFSV] = {
- .create = au_wbr_create_mfs,
- .init = au_wbr_create_init_mfs,
- .fin = au_wbr_create_fin_mfs
- },
- [AuWbrCreate_MFSRR] = {
- .create = au_wbr_create_mfsrr,
- .init = au_wbr_create_init_mfsrr,
- .fin = au_wbr_create_fin_mfs
- },
- [AuWbrCreate_MFSRRV] = {
- .create = au_wbr_create_mfsrr,
- .init = au_wbr_create_init_mfsrr,
- .fin = au_wbr_create_fin_mfs
- },
- [AuWbrCreate_PMFS] = {
- .create = au_wbr_create_pmfs,
- .init = au_wbr_create_init_mfs,
- .fin = au_wbr_create_fin_mfs
- },
- [AuWbrCreate_PMFSV] = {
- .create = au_wbr_create_pmfs,
- .init = au_wbr_create_init_mfs,
- .fin = au_wbr_create_fin_mfs
- },
- [AuWbrCreate_PMFSRR] = {
- .create = au_wbr_create_pmfsrr,
- .init = au_wbr_create_init_mfsrr,
- .fin = au_wbr_create_fin_mfs
- },
- [AuWbrCreate_PMFSRRV] = {
- .create = au_wbr_create_pmfsrr,
- .init = au_wbr_create_init_mfsrr,
- .fin = au_wbr_create_fin_mfs
- }
-};
diff --git a/fs/aufs/whout.c b/fs/aufs/whout.c
deleted file mode 100644
index 04eb9af2b..000000000
--- a/fs/aufs/whout.c
+++ /dev/null
@@ -1,1047 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * whiteout for logical deletion and opaque directory
- */
-
-#include "aufs.h"
-
-#define WH_MASK S_IRUGO
-
-/*
- * If a directory contains this file, then it is opaque. We start with the
- * .wh. flag so that it is blocked by lookup.
- */
-static struct qstr diropq_name = QSTR_INIT(AUFS_WH_DIROPQ,
- sizeof(AUFS_WH_DIROPQ) - 1);
-
-/*
- * generate whiteout name, which is NOT terminated by NULL.
- * @name: original d_name.name
- * @len: original d_name.len
- * @wh: whiteout qstr
- * returns zero when succeeds, otherwise error.
- * succeeded value as wh->name should be freed by kfree().
- */
-int au_wh_name_alloc(struct qstr *wh, const struct qstr *name)
-{
- char *p;
-
- if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN))
- return -ENAMETOOLONG;
-
- wh->len = name->len + AUFS_WH_PFX_LEN;
- p = kmalloc(wh->len, GFP_NOFS);
- wh->name = p;
- if (p) {
- memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
- memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len);
- /* smp_mb(); */
- return 0;
- }
- return -ENOMEM;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * test if the @wh_name exists under @h_parent.
- * @try_sio specifies the necessary of super-io.
- */
-int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio)
-{
- int err;
- struct dentry *wh_dentry;
-
- if (!try_sio)
- wh_dentry = vfsub_lkup_one(wh_name, h_parent);
- else
- wh_dentry = au_sio_lkup_one(wh_name, h_parent);
- err = PTR_ERR(wh_dentry);
- if (IS_ERR(wh_dentry)) {
- if (err == -ENAMETOOLONG)
- err = 0;
- goto out;
- }
-
- err = 0;
- if (d_is_negative(wh_dentry))
- goto out_wh; /* success */
-
- err = 1;
- if (d_is_reg(wh_dentry))
- goto out_wh; /* success */
-
- err = -EIO;
- AuIOErr("%pd Invalid whiteout entry type 0%o.\n",
- wh_dentry, d_inode(wh_dentry)->i_mode);
-
-out_wh:
- dput(wh_dentry);
-out:
- return err;
-}
-
-/*
- * test if the @h_dentry sets opaque or not.
- */
-int au_diropq_test(struct dentry *h_dentry)
-{
- int err;
- struct inode *h_dir;
-
- h_dir = d_inode(h_dentry);
- err = au_wh_test(h_dentry, &diropq_name,
- au_test_h_perm_sio(h_dir, MAY_EXEC));
- return err;
-}
-
-/*
- * returns a negative dentry whose name is unique and temporary.
- */
-struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
- struct qstr *prefix)
-{
- struct dentry *dentry;
- int i;
- char defname[NAME_MAX - AUFS_MAX_NAMELEN + DNAME_INLINE_LEN + 1],
- *name, *p;
- /* strict atomic_t is unnecessary here */
- static unsigned short cnt;
- struct qstr qs;
-
- BUILD_BUG_ON(sizeof(cnt) * 2 > AUFS_WH_TMP_LEN);
-
- name = defname;
- qs.len = sizeof(defname) - DNAME_INLINE_LEN + prefix->len - 1;
- if (unlikely(prefix->len > DNAME_INLINE_LEN)) {
- dentry = ERR_PTR(-ENAMETOOLONG);
- if (unlikely(qs.len > NAME_MAX))
- goto out;
- dentry = ERR_PTR(-ENOMEM);
- name = kmalloc(qs.len + 1, GFP_NOFS);
- if (unlikely(!name))
- goto out;
- }
-
- /* doubly whiteout-ed */
- memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2);
- p = name + AUFS_WH_PFX_LEN * 2;
- memcpy(p, prefix->name, prefix->len);
- p += prefix->len;
- *p++ = '.';
- AuDebugOn(name + qs.len + 1 - p <= AUFS_WH_TMP_LEN);
-
- qs.name = name;
- for (i = 0; i < 3; i++) {
- sprintf(p, "%.*x", AUFS_WH_TMP_LEN, cnt++);
- dentry = au_sio_lkup_one(&qs, h_parent);
- if (IS_ERR(dentry) || d_is_negative(dentry))
- goto out_name;
- dput(dentry);
- }
- /* pr_warn("could not get random name\n"); */
- dentry = ERR_PTR(-EEXIST);
- AuDbg("%.*s\n", AuLNPair(&qs));
- BUG();
-
-out_name:
- if (name != defname)
- kfree(name);
-out:
- AuTraceErrPtr(dentry);
- return dentry;
-}
-
-/*
- * rename the @h_dentry on @br to the whiteouted temporary name.
- */
-int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br)
-{
- int err;
- struct path h_path = {
- .mnt = au_br_mnt(br)
- };
- struct inode *h_dir, *delegated;
- struct dentry *h_parent;
-
- h_parent = h_dentry->d_parent; /* dir inode is locked */
- h_dir = d_inode(h_parent);
- IMustLock(h_dir);
-
- h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name);
- err = PTR_ERR(h_path.dentry);
- if (IS_ERR(h_path.dentry))
- goto out;
-
- /* under the same dir, no need to lock_rename() */
- delegated = NULL;
- err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path, &delegated);
- AuTraceErr(err);
- if (unlikely(err == -EWOULDBLOCK)) {
- pr_warn("cannot retry for NFSv4 delegation"
- " for an internal rename\n");
- iput(delegated);
- }
- dput(h_path.dentry);
-
-out:
- AuTraceErr(err);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-/*
- * functions for removing a whiteout
- */
-
-static int do_unlink_wh(struct inode *h_dir, struct path *h_path)
-{
- int err, force;
- struct inode *delegated;
-
- /*
- * forces superio when the dir has a sticky bit.
- * this may be a violation of unix fs semantics.
- */
- force = (h_dir->i_mode & S_ISVTX)
- && !uid_eq(current_fsuid(), d_inode(h_path->dentry)->i_uid);
- delegated = NULL;
- err = vfsub_unlink(h_dir, h_path, &delegated, force);
- if (unlikely(err == -EWOULDBLOCK)) {
- pr_warn("cannot retry for NFSv4 delegation"
- " for an internal unlink\n");
- iput(delegated);
- }
- return err;
-}
-
-int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
- struct dentry *dentry)
-{
- int err;
-
- err = do_unlink_wh(h_dir, h_path);
- if (!err && dentry)
- au_set_dbwh(dentry, -1);
-
- return err;
-}
-
-static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh,
- struct au_branch *br)
-{
- int err;
- struct path h_path = {
- .mnt = au_br_mnt(br)
- };
-
- err = 0;
- h_path.dentry = vfsub_lkup_one(wh, h_parent);
- if (IS_ERR(h_path.dentry))
- err = PTR_ERR(h_path.dentry);
- else {
- if (d_is_reg(h_path.dentry))
- err = do_unlink_wh(d_inode(h_parent), &h_path);
- dput(h_path.dentry);
- }
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-/*
- * initialize/clean whiteout for a branch
- */
-
-static void au_wh_clean(struct inode *h_dir, struct path *whpath,
- const int isdir)
-{
- int err;
- struct inode *delegated;
-
- if (d_is_negative(whpath->dentry))
- return;
-
- if (isdir)
- err = vfsub_rmdir(h_dir, whpath);
- else {
- delegated = NULL;
- err = vfsub_unlink(h_dir, whpath, &delegated, /*force*/0);
- if (unlikely(err == -EWOULDBLOCK)) {
- pr_warn("cannot retry for NFSv4 delegation"
- " for an internal unlink\n");
- iput(delegated);
- }
- }
- if (unlikely(err))
- pr_warn("failed removing %pd (%d), ignored.\n",
- whpath->dentry, err);
-}
-
-static int test_linkable(struct dentry *h_root)
-{
- struct inode *h_dir = d_inode(h_root);
-
- if (h_dir->i_op->link)
- return 0;
-
- pr_err("%pd (%s) doesn't support link(2), use noplink and rw+nolwh\n",
- h_root, au_sbtype(h_root->d_sb));
- return -ENOSYS;
-}
-
-/* todo: should this mkdir be done in /sbin/mount.aufs helper? */
-static int au_whdir(struct inode *h_dir, struct path *path)
-{
- int err;
-
- err = -EEXIST;
- if (d_is_negative(path->dentry)) {
- int mode = S_IRWXU;
-
- if (au_test_nfs(path->dentry->d_sb))
- mode |= S_IXUGO;
- err = vfsub_mkdir(h_dir, path, mode);
- } else if (d_is_dir(path->dentry))
- err = 0;
- else
- pr_err("unknown %pd exists\n", path->dentry);
-
- return err;
-}
-
-struct au_wh_base {
- const struct qstr *name;
- struct dentry *dentry;
-};
-
-static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[],
- struct path *h_path)
-{
- h_path->dentry = base[AuBrWh_BASE].dentry;
- au_wh_clean(h_dir, h_path, /*isdir*/0);
- h_path->dentry = base[AuBrWh_PLINK].dentry;
- au_wh_clean(h_dir, h_path, /*isdir*/1);
- h_path->dentry = base[AuBrWh_ORPH].dentry;
- au_wh_clean(h_dir, h_path, /*isdir*/1);
-}
-
-/*
- * returns tri-state,
- * minus: error, caller should print the message
- * zero: succuess
- * plus: error, caller should NOT print the message
- */
-static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr,
- int do_plink, struct au_wh_base base[],
- struct path *h_path)
-{
- int err;
- struct inode *h_dir;
-
- h_dir = d_inode(h_root);
- h_path->dentry = base[AuBrWh_BASE].dentry;
- au_wh_clean(h_dir, h_path, /*isdir*/0);
- h_path->dentry = base[AuBrWh_PLINK].dentry;
- if (do_plink) {
- err = test_linkable(h_root);
- if (unlikely(err)) {
- err = 1;
- goto out;
- }
-
- err = au_whdir(h_dir, h_path);
- if (unlikely(err))
- goto out;
- wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
- } else
- au_wh_clean(h_dir, h_path, /*isdir*/1);
- h_path->dentry = base[AuBrWh_ORPH].dentry;
- err = au_whdir(h_dir, h_path);
- if (unlikely(err))
- goto out;
- wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
-
-out:
- return err;
-}
-
-/*
- * for the moment, aufs supports the branch filesystem which does not support
- * link(2). testing on FAT which does not support i_op->setattr() fully either,
- * copyup failed. finally, such filesystem will not be used as the writable
- * branch.
- *
- * returns tri-state, see above.
- */
-static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr,
- int do_plink, struct au_wh_base base[],
- struct path *h_path)
-{
- int err;
- struct inode *h_dir;
-
- WbrWhMustWriteLock(wbr);
-
- err = test_linkable(h_root);
- if (unlikely(err)) {
- err = 1;
- goto out;
- }
-
- /*
- * todo: should this create be done in /sbin/mount.aufs helper?
- */
- err = -EEXIST;
- h_dir = d_inode(h_root);
- if (d_is_negative(base[AuBrWh_BASE].dentry)) {
- h_path->dentry = base[AuBrWh_BASE].dentry;
- err = vfsub_create(h_dir, h_path, WH_MASK, /*want_excl*/true);
- } else if (d_is_reg(base[AuBrWh_BASE].dentry))
- err = 0;
- else
- pr_err("unknown %pd2 exists\n", base[AuBrWh_BASE].dentry);
- if (unlikely(err))
- goto out;
-
- h_path->dentry = base[AuBrWh_PLINK].dentry;
- if (do_plink) {
- err = au_whdir(h_dir, h_path);
- if (unlikely(err))
- goto out;
- wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
- } else
- au_wh_clean(h_dir, h_path, /*isdir*/1);
- wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry);
-
- h_path->dentry = base[AuBrWh_ORPH].dentry;
- err = au_whdir(h_dir, h_path);
- if (unlikely(err))
- goto out;
- wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
-
-out:
- return err;
-}
-
-/*
- * initialize the whiteout base file/dir for @br.
- */
-int au_wh_init(struct au_branch *br, struct super_block *sb)
-{
- int err, i;
- const unsigned char do_plink
- = !!au_opt_test(au_mntflags(sb), PLINK);
- struct inode *h_dir;
- struct path path = br->br_path;
- struct dentry *h_root = path.dentry;
- struct au_wbr *wbr = br->br_wbr;
- static const struct qstr base_name[] = {
- [AuBrWh_BASE] = QSTR_INIT(AUFS_BASE_NAME,
- sizeof(AUFS_BASE_NAME) - 1),
- [AuBrWh_PLINK] = QSTR_INIT(AUFS_PLINKDIR_NAME,
- sizeof(AUFS_PLINKDIR_NAME) - 1),
- [AuBrWh_ORPH] = QSTR_INIT(AUFS_ORPHDIR_NAME,
- sizeof(AUFS_ORPHDIR_NAME) - 1)
- };
- struct au_wh_base base[] = {
- [AuBrWh_BASE] = {
- .name = base_name + AuBrWh_BASE,
- .dentry = NULL
- },
- [AuBrWh_PLINK] = {
- .name = base_name + AuBrWh_PLINK,
- .dentry = NULL
- },
- [AuBrWh_ORPH] = {
- .name = base_name + AuBrWh_ORPH,
- .dentry = NULL
- }
- };
-
- if (wbr)
- WbrWhMustWriteLock(wbr);
-
- for (i = 0; i < AuBrWh_Last; i++) {
- /* doubly whiteouted */
- struct dentry *d;
-
- d = au_wh_lkup(h_root, (void *)base[i].name, br);
- err = PTR_ERR(d);
- if (IS_ERR(d))
- goto out;
-
- base[i].dentry = d;
- AuDebugOn(wbr
- && wbr->wbr_wh[i]
- && wbr->wbr_wh[i] != base[i].dentry);
- }
-
- if (wbr)
- for (i = 0; i < AuBrWh_Last; i++) {
- dput(wbr->wbr_wh[i]);
- wbr->wbr_wh[i] = NULL;
- }
-
- err = 0;
- if (!au_br_writable(br->br_perm)) {
- h_dir = d_inode(h_root);
- au_wh_init_ro(h_dir, base, &path);
- } else if (!au_br_wh_linkable(br->br_perm)) {
- err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path);
- if (err > 0)
- goto out;
- else if (err)
- goto out_err;
- } else {
- err = au_wh_init_rw(h_root, wbr, do_plink, base, &path);
- if (err > 0)
- goto out;
- else if (err)
- goto out_err;
- }
- goto out; /* success */
-
-out_err:
- pr_err("an error(%d) on the writable branch %pd(%s)\n",
- err, h_root, au_sbtype(h_root->d_sb));
-out:
- for (i = 0; i < AuBrWh_Last; i++)
- dput(base[i].dentry);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-/*
- * whiteouts are all hard-linked usually.
- * when its link count reaches a ceiling, we create a new whiteout base
- * asynchronously.
- */
-
-struct reinit_br_wh {
- struct super_block *sb;
- struct au_branch *br;
-};
-
-static void reinit_br_wh(void *arg)
-{
- int err;
- aufs_bindex_t bindex;
- struct path h_path;
- struct reinit_br_wh *a = arg;
- struct au_wbr *wbr;
- struct inode *dir, *delegated;
- struct dentry *h_root;
- struct au_hinode *hdir;
-
- err = 0;
- wbr = a->br->br_wbr;
- /* big aufs lock */
- si_noflush_write_lock(a->sb);
- if (!au_br_writable(a->br->br_perm))
- goto out;
- bindex = au_br_index(a->sb, a->br->br_id);
- if (unlikely(bindex < 0))
- goto out;
-
- di_read_lock_parent(a->sb->s_root, AuLock_IR);
- dir = d_inode(a->sb->s_root);
- hdir = au_hi(dir, bindex);
- h_root = au_h_dptr(a->sb->s_root, bindex);
- AuDebugOn(h_root != au_br_dentry(a->br));
-
- au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
- wbr_wh_write_lock(wbr);
- err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode,
- h_root, a->br);
- if (!err) {
- h_path.dentry = wbr->wbr_whbase;
- h_path.mnt = au_br_mnt(a->br);
- delegated = NULL;
- err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated,
- /*force*/0);
- if (unlikely(err == -EWOULDBLOCK)) {
- pr_warn("cannot retry for NFSv4 delegation"
- " for an internal unlink\n");
- iput(delegated);
- }
- } else {
- pr_warn("%pd is moved, ignored\n", wbr->wbr_whbase);
- err = 0;
- }
- dput(wbr->wbr_whbase);
- wbr->wbr_whbase = NULL;
- if (!err)
- err = au_wh_init(a->br, a->sb);
- wbr_wh_write_unlock(wbr);
- au_hn_imtx_unlock(hdir);
- di_read_unlock(a->sb->s_root, AuLock_IR);
- if (!err)
- au_fhsm_wrote(a->sb, bindex, /*force*/0);
-
-out:
- if (wbr)
- atomic_dec(&wbr->wbr_wh_running);
- atomic_dec(&a->br->br_count);
- si_write_unlock(a->sb);
- au_nwt_done(&au_sbi(a->sb)->si_nowait);
- kfree(arg);
- if (unlikely(err))
- AuIOErr("err %d\n", err);
-}
-
-static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br)
-{
- int do_dec, wkq_err;
- struct reinit_br_wh *arg;
-
- do_dec = 1;
- if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1)
- goto out;
-
- /* ignore ENOMEM */
- arg = kmalloc(sizeof(*arg), GFP_NOFS);
- if (arg) {
- /*
- * dec(wh_running), kfree(arg) and dec(br_count)
- * in reinit function
- */
- arg->sb = sb;
- arg->br = br;
- atomic_inc(&br->br_count);
- wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb, /*flags*/0);
- if (unlikely(wkq_err)) {
- atomic_dec(&br->br_wbr->wbr_wh_running);
- atomic_dec(&br->br_count);
- kfree(arg);
- }
- do_dec = 0;
- }
-
-out:
- if (do_dec)
- atomic_dec(&br->br_wbr->wbr_wh_running);
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * create the whiteout @wh.
- */
-static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex,
- struct dentry *wh)
-{
- int err;
- struct path h_path = {
- .dentry = wh
- };
- struct au_branch *br;
- struct au_wbr *wbr;
- struct dentry *h_parent;
- struct inode *h_dir, *delegated;
-
- h_parent = wh->d_parent; /* dir inode is locked */
- h_dir = d_inode(h_parent);
- IMustLock(h_dir);
-
- br = au_sbr(sb, bindex);
- h_path.mnt = au_br_mnt(br);
- wbr = br->br_wbr;
- wbr_wh_read_lock(wbr);
- if (wbr->wbr_whbase) {
- delegated = NULL;
- err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path, &delegated);
- if (unlikely(err == -EWOULDBLOCK)) {
- pr_warn("cannot retry for NFSv4 delegation"
- " for an internal link\n");
- iput(delegated);
- }
- if (!err || err != -EMLINK)
- goto out;
-
- /* link count full. re-initialize br_whbase. */
- kick_reinit_br_wh(sb, br);
- }
-
- /* return this error in this context */
- err = vfsub_create(h_dir, &h_path, WH_MASK, /*want_excl*/true);
- if (!err)
- au_fhsm_wrote(sb, bindex, /*force*/0);
-
-out:
- wbr_wh_read_unlock(wbr);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * create or remove the diropq.
- */
-static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex,
- unsigned int flags)
-{
- struct dentry *opq_dentry, *h_dentry;
- struct super_block *sb;
- struct au_branch *br;
- int err;
-
- sb = dentry->d_sb;
- br = au_sbr(sb, bindex);
- h_dentry = au_h_dptr(dentry, bindex);
- opq_dentry = vfsub_lkup_one(&diropq_name, h_dentry);
- if (IS_ERR(opq_dentry))
- goto out;
-
- if (au_ftest_diropq(flags, CREATE)) {
- err = link_or_create_wh(sb, bindex, opq_dentry);
- if (!err) {
- au_set_dbdiropq(dentry, bindex);
- goto out; /* success */
- }
- } else {
- struct path tmp = {
- .dentry = opq_dentry,
- .mnt = au_br_mnt(br)
- };
- err = do_unlink_wh(au_h_iptr(d_inode(dentry), bindex), &tmp);
- if (!err)
- au_set_dbdiropq(dentry, -1);
- }
- dput(opq_dentry);
- opq_dentry = ERR_PTR(err);
-
-out:
- return opq_dentry;
-}
-
-struct do_diropq_args {
- struct dentry **errp;
- struct dentry *dentry;
- aufs_bindex_t bindex;
- unsigned int flags;
-};
-
-static void call_do_diropq(void *args)
-{
- struct do_diropq_args *a = args;
- *a->errp = do_diropq(a->dentry, a->bindex, a->flags);
-}
-
-struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
- unsigned int flags)
-{
- struct dentry *diropq, *h_dentry;
-
- h_dentry = au_h_dptr(dentry, bindex);
- if (!au_test_h_perm_sio(d_inode(h_dentry), MAY_EXEC | MAY_WRITE))
- diropq = do_diropq(dentry, bindex, flags);
- else {
- int wkq_err;
- struct do_diropq_args args = {
- .errp = &diropq,
- .dentry = dentry,
- .bindex = bindex,
- .flags = flags
- };
-
- wkq_err = au_wkq_wait(call_do_diropq, &args);
- if (unlikely(wkq_err))
- diropq = ERR_PTR(wkq_err);
- }
-
- return diropq;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * lookup whiteout dentry.
- * @h_parent: lower parent dentry which must exist and be locked
- * @base_name: name of dentry which will be whiteouted
- * returns dentry for whiteout.
- */
-struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
- struct au_branch *br)
-{
- int err;
- struct qstr wh_name;
- struct dentry *wh_dentry;
-
- err = au_wh_name_alloc(&wh_name, base_name);
- wh_dentry = ERR_PTR(err);
- if (!err) {
- wh_dentry = vfsub_lkup_one(&wh_name, h_parent);
- kfree(wh_name.name);
- }
- return wh_dentry;
-}
-
-/*
- * link/create a whiteout for @dentry on @bindex.
- */
-struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
- struct dentry *h_parent)
-{
- struct dentry *wh_dentry;
- struct super_block *sb;
- int err;
-
- sb = dentry->d_sb;
- wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex));
- if (!IS_ERR(wh_dentry) && d_is_negative(wh_dentry)) {
- err = link_or_create_wh(sb, bindex, wh_dentry);
- if (!err) {
- au_set_dbwh(dentry, bindex);
- au_fhsm_wrote(sb, bindex, /*force*/0);
- } else {
- dput(wh_dentry);
- wh_dentry = ERR_PTR(err);
- }
- }
-
- return wh_dentry;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* Delete all whiteouts in this directory on branch bindex. */
-static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist,
- aufs_bindex_t bindex, struct au_branch *br)
-{
- int err;
- unsigned long ul, n;
- struct qstr wh_name;
- char *p;
- struct hlist_head *head;
- struct au_vdir_wh *pos;
- struct au_vdir_destr *str;
-
- err = -ENOMEM;
- p = (void *)__get_free_page(GFP_NOFS);
- wh_name.name = p;
- if (unlikely(!wh_name.name))
- goto out;
-
- err = 0;
- memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
- p += AUFS_WH_PFX_LEN;
- n = whlist->nh_num;
- head = whlist->nh_head;
- for (ul = 0; !err && ul < n; ul++, head++) {
- hlist_for_each_entry(pos, head, wh_hash) {
- if (pos->wh_bindex != bindex)
- continue;
-
- str = &pos->wh_str;
- if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) {
- memcpy(p, str->name, str->len);
- wh_name.len = AUFS_WH_PFX_LEN + str->len;
- err = unlink_wh_name(h_dentry, &wh_name, br);
- if (!err)
- continue;
- break;
- }
- AuIOErr("whiteout name too long %.*s\n",
- str->len, str->name);
- err = -EIO;
- break;
- }
- }
- free_page((unsigned long)wh_name.name);
-
-out:
- return err;
-}
-
-struct del_wh_children_args {
- int *errp;
- struct dentry *h_dentry;
- struct au_nhash *whlist;
- aufs_bindex_t bindex;
- struct au_branch *br;
-};
-
-static void call_del_wh_children(void *args)
-{
- struct del_wh_children_args *a = args;
- *a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br);
-}
-
-/* ---------------------------------------------------------------------- */
-
-struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp)
-{
- struct au_whtmp_rmdir *whtmp;
- int err;
- unsigned int rdhash;
-
- SiMustAnyLock(sb);
-
- whtmp = kzalloc(sizeof(*whtmp), gfp);
- if (unlikely(!whtmp)) {
- whtmp = ERR_PTR(-ENOMEM);
- goto out;
- }
-
- /* no estimation for dir size */
- rdhash = au_sbi(sb)->si_rdhash;
- if (!rdhash)
- rdhash = AUFS_RDHASH_DEF;
- err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp);
- if (unlikely(err)) {
- kfree(whtmp);
- whtmp = ERR_PTR(err);
- }
-
-out:
- return whtmp;
-}
-
-void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
-{
- if (whtmp->br)
- atomic_dec(&whtmp->br->br_count);
- dput(whtmp->wh_dentry);
- iput(whtmp->dir);
- au_nhash_wh_free(&whtmp->whlist);
- kfree(whtmp);
-}
-
-/*
- * rmdir the whiteouted temporary named dir @h_dentry.
- * @whlist: whiteouted children.
- */
-int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
- struct dentry *wh_dentry, struct au_nhash *whlist)
-{
- int err;
- unsigned int h_nlink;
- struct path h_tmp;
- struct inode *wh_inode, *h_dir;
- struct au_branch *br;
-
- h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */
- IMustLock(h_dir);
-
- br = au_sbr(dir->i_sb, bindex);
- wh_inode = d_inode(wh_dentry);
- mutex_lock_nested(&wh_inode->i_mutex, AuLsc_I_CHILD);
-
- /*
- * someone else might change some whiteouts while we were sleeping.
- * it means this whlist may have an obsoleted entry.
- */
- if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE))
- err = del_wh_children(wh_dentry, whlist, bindex, br);
- else {
- int wkq_err;
- struct del_wh_children_args args = {
- .errp = &err,
- .h_dentry = wh_dentry,
- .whlist = whlist,
- .bindex = bindex,
- .br = br
- };
-
- wkq_err = au_wkq_wait(call_del_wh_children, &args);
- if (unlikely(wkq_err))
- err = wkq_err;
- }
- mutex_unlock(&wh_inode->i_mutex);
-
- if (!err) {
- h_tmp.dentry = wh_dentry;
- h_tmp.mnt = au_br_mnt(br);
- h_nlink = h_dir->i_nlink;
- err = vfsub_rmdir(h_dir, &h_tmp);
- /* some fs doesn't change the parent nlink in some cases */
- h_nlink -= h_dir->i_nlink;
- }
-
- if (!err) {
- if (au_ibstart(dir) == bindex) {
- /* todo: dir->i_mutex is necessary */
- au_cpup_attr_timesizes(dir);
- if (h_nlink)
- vfsub_drop_nlink(dir);
- }
- return 0; /* success */
- }
-
- pr_warn("failed removing %pd(%d), ignored\n", wh_dentry, err);
- return err;
-}
-
-static void call_rmdir_whtmp(void *args)
-{
- int err;
- aufs_bindex_t bindex;
- struct au_whtmp_rmdir *a = args;
- struct super_block *sb;
- struct dentry *h_parent;
- struct inode *h_dir;
- struct au_hinode *hdir;
-
- /* rmdir by nfsd may cause deadlock with this i_mutex */
- /* mutex_lock(&a->dir->i_mutex); */
- err = -EROFS;
- sb = a->dir->i_sb;
- si_read_lock(sb, !AuLock_FLUSH);
- if (!au_br_writable(a->br->br_perm))
- goto out;
- bindex = au_br_index(sb, a->br->br_id);
- if (unlikely(bindex < 0))
- goto out;
-
- err = -EIO;
- ii_write_lock_parent(a->dir);
- h_parent = dget_parent(a->wh_dentry);
- h_dir = d_inode(h_parent);
- hdir = au_hi(a->dir, bindex);
- err = vfsub_mnt_want_write(au_br_mnt(a->br));
- if (unlikely(err))
- goto out_mnt;
- au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
- err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent,
- a->br);
- if (!err)
- err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry, &a->whlist);
- au_hn_imtx_unlock(hdir);
- vfsub_mnt_drop_write(au_br_mnt(a->br));
-
-out_mnt:
- dput(h_parent);
- ii_write_unlock(a->dir);
-out:
- /* mutex_unlock(&a->dir->i_mutex); */
- au_whtmp_rmdir_free(a);
- si_read_unlock(sb);
- au_nwt_done(&au_sbi(sb)->si_nowait);
- if (unlikely(err))
- AuIOErr("err %d\n", err);
-}
-
-void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
- struct dentry *wh_dentry, struct au_whtmp_rmdir *args)
-{
- int wkq_err;
- struct super_block *sb;
-
- IMustLock(dir);
-
- /* all post-process will be done in do_rmdir_whtmp(). */
- sb = dir->i_sb;
- args->dir = au_igrab(dir);
- args->br = au_sbr(sb, bindex);
- atomic_inc(&args->br->br_count);
- args->wh_dentry = dget(wh_dentry);
- wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, sb, /*flags*/0);
- if (unlikely(wkq_err)) {
- pr_warn("rmdir error %pd (%d), ignored\n", wh_dentry, wkq_err);
- au_whtmp_rmdir_free(args);
- }
-}
diff --git a/fs/aufs/whout.h b/fs/aufs/whout.h
deleted file mode 100644
index 4077dd19e..000000000
--- a/fs/aufs/whout.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * whiteout for logical deletion and opaque directory
- */
-
-#ifndef __AUFS_WHOUT_H__
-#define __AUFS_WHOUT_H__
-
-#ifdef __KERNEL__
-
-#include "dir.h"
-
-/* whout.c */
-int au_wh_name_alloc(struct qstr *wh, const struct qstr *name);
-int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio);
-int au_diropq_test(struct dentry *h_dentry);
-struct au_branch;
-struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
- struct qstr *prefix);
-int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br);
-int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
- struct dentry *dentry);
-int au_wh_init(struct au_branch *br, struct super_block *sb);
-
-/* diropq flags */
-#define AuDiropq_CREATE 1
-#define au_ftest_diropq(flags, name) ((flags) & AuDiropq_##name)
-#define au_fset_diropq(flags, name) \
- do { (flags) |= AuDiropq_##name; } while (0)
-#define au_fclr_diropq(flags, name) \
- do { (flags) &= ~AuDiropq_##name; } while (0)
-
-struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
- unsigned int flags);
-struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
- struct au_branch *br);
-struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
- struct dentry *h_parent);
-
-/* real rmdir for the whiteout-ed dir */
-struct au_whtmp_rmdir {
- struct inode *dir;
- struct au_branch *br;
- struct dentry *wh_dentry;
- struct au_nhash whlist;
-};
-
-struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp);
-void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp);
-int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
- struct dentry *wh_dentry, struct au_nhash *whlist);
-void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
- struct dentry *wh_dentry, struct au_whtmp_rmdir *args);
-
-/* ---------------------------------------------------------------------- */
-
-static inline struct dentry *au_diropq_create(struct dentry *dentry,
- aufs_bindex_t bindex)
-{
- return au_diropq_sio(dentry, bindex, AuDiropq_CREATE);
-}
-
-static inline int au_diropq_remove(struct dentry *dentry, aufs_bindex_t bindex)
-{
- return PTR_ERR(au_diropq_sio(dentry, bindex, !AuDiropq_CREATE));
-}
-
-#endif /* __KERNEL__ */
-#endif /* __AUFS_WHOUT_H__ */
diff --git a/fs/aufs/wkq.c b/fs/aufs/wkq.c
deleted file mode 100644
index 0f1500e93..000000000
--- a/fs/aufs/wkq.c
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * workqueue for asynchronous/super-io operations
- * todo: try new dredential scheme
- */
-
-#include <linux/module.h>
-#include "aufs.h"
-
-/* internal workqueue named AUFS_WKQ_NAME */
-
-static struct workqueue_struct *au_wkq;
-
-struct au_wkinfo {
- struct work_struct wk;
- struct kobject *kobj;
-
- unsigned int flags; /* see wkq.h */
-
- au_wkq_func_t func;
- void *args;
-
- struct completion *comp;
-};
-
-/* ---------------------------------------------------------------------- */
-
-static void wkq_func(struct work_struct *wk)
-{
- struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk);
-
- AuDebugOn(!uid_eq(current_fsuid(), GLOBAL_ROOT_UID));
- AuDebugOn(rlimit(RLIMIT_FSIZE) != RLIM_INFINITY);
-
- wkinfo->func(wkinfo->args);
- if (au_ftest_wkq(wkinfo->flags, WAIT))
- complete(wkinfo->comp);
- else {
- kobject_put(wkinfo->kobj);
- module_put(THIS_MODULE); /* todo: ?? */
- kfree(wkinfo);
- }
-}
-
-/*
- * Since struct completion is large, try allocating it dynamically.
- */
-#if 1 /* defined(CONFIG_4KSTACKS) || defined(AuTest4KSTACKS) */
-#define AuWkqCompDeclare(name) struct completion *comp = NULL
-
-static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
-{
- *comp = kmalloc(sizeof(**comp), GFP_NOFS);
- if (*comp) {
- init_completion(*comp);
- wkinfo->comp = *comp;
- return 0;
- }
- return -ENOMEM;
-}
-
-static void au_wkq_comp_free(struct completion *comp)
-{
- kfree(comp);
-}
-
-#else
-
-/* no braces */
-#define AuWkqCompDeclare(name) \
- DECLARE_COMPLETION_ONSTACK(_ ## name); \
- struct completion *comp = &_ ## name
-
-static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
-{
- wkinfo->comp = *comp;
- return 0;
-}
-
-static void au_wkq_comp_free(struct completion *comp __maybe_unused)
-{
- /* empty */
-}
-#endif /* 4KSTACKS */
-
-static void au_wkq_run(struct au_wkinfo *wkinfo)
-{
- if (au_ftest_wkq(wkinfo->flags, NEST)) {
- if (au_wkq_test()) {
- AuWarn1("wkq from wkq, unless silly-rename on NFS,"
- " due to a dead dir by UDBA?\n");
- AuDebugOn(au_ftest_wkq(wkinfo->flags, WAIT));
- }
- } else
- au_dbg_verify_kthread();
-
- if (au_ftest_wkq(wkinfo->flags, WAIT)) {
- INIT_WORK_ONSTACK(&wkinfo->wk, wkq_func);
- queue_work(au_wkq, &wkinfo->wk);
- } else {
- INIT_WORK(&wkinfo->wk, wkq_func);
- schedule_work(&wkinfo->wk);
- }
-}
-
-/*
- * Be careful. It is easy to make deadlock happen.
- * processA: lock, wkq and wait
- * processB: wkq and wait, lock in wkq
- * --> deadlock
- */
-int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args)
-{
- int err;
- AuWkqCompDeclare(comp);
- struct au_wkinfo wkinfo = {
- .flags = flags,
- .func = func,
- .args = args
- };
-
- err = au_wkq_comp_alloc(&wkinfo, &comp);
- if (!err) {
- au_wkq_run(&wkinfo);
- /* no timeout, no interrupt */
- wait_for_completion(wkinfo.comp);
- au_wkq_comp_free(comp);
- destroy_work_on_stack(&wkinfo.wk);
- }
-
- return err;
-
-}
-
-/*
- * Note: dget/dput() in func for aufs dentries are not supported. It will be a
- * problem in a concurrent umounting.
- */
-int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
- unsigned int flags)
-{
- int err;
- struct au_wkinfo *wkinfo;
-
- atomic_inc(&au_sbi(sb)->si_nowait.nw_len);
-
- /*
- * wkq_func() must free this wkinfo.
- * it highly depends upon the implementation of workqueue.
- */
- err = 0;
- wkinfo = kmalloc(sizeof(*wkinfo), GFP_NOFS);
- if (wkinfo) {
- wkinfo->kobj = &au_sbi(sb)->si_kobj;
- wkinfo->flags = flags & ~AuWkq_WAIT;
- wkinfo->func = func;
- wkinfo->args = args;
- wkinfo->comp = NULL;
- kobject_get(wkinfo->kobj);
- __module_get(THIS_MODULE); /* todo: ?? */
-
- au_wkq_run(wkinfo);
- } else {
- err = -ENOMEM;
- au_nwt_done(&au_sbi(sb)->si_nowait);
- }
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-void au_nwt_init(struct au_nowait_tasks *nwt)
-{
- atomic_set(&nwt->nw_len, 0);
- /* smp_mb(); */ /* atomic_set */
- init_waitqueue_head(&nwt->nw_wq);
-}
-
-void au_wkq_fin(void)
-{
- destroy_workqueue(au_wkq);
-}
-
-int __init au_wkq_init(void)
-{
- int err;
-
- err = 0;
- au_wkq = alloc_workqueue(AUFS_WKQ_NAME, 0, WQ_DFL_ACTIVE);
- if (IS_ERR(au_wkq))
- err = PTR_ERR(au_wkq);
- else if (!au_wkq)
- err = -ENOMEM;
-
- return err;
-}
diff --git a/fs/aufs/wkq.h b/fs/aufs/wkq.h
deleted file mode 100644
index f6c9b9902..000000000
--- a/fs/aufs/wkq.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * workqueue for asynchronous/super-io operations
- * todo: try new credentials management scheme
- */
-
-#ifndef __AUFS_WKQ_H__
-#define __AUFS_WKQ_H__
-
-#ifdef __KERNEL__
-
-struct super_block;
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * in the next operation, wait for the 'nowait' tasks in system-wide workqueue
- */
-struct au_nowait_tasks {
- atomic_t nw_len;
- wait_queue_head_t nw_wq;
-};
-
-/* ---------------------------------------------------------------------- */
-
-typedef void (*au_wkq_func_t)(void *args);
-
-/* wkq flags */
-#define AuWkq_WAIT 1
-#define AuWkq_NEST (1 << 1)
-#define au_ftest_wkq(flags, name) ((flags) & AuWkq_##name)
-#define au_fset_wkq(flags, name) \
- do { (flags) |= AuWkq_##name; } while (0)
-#define au_fclr_wkq(flags, name) \
- do { (flags) &= ~AuWkq_##name; } while (0)
-
-#ifndef CONFIG_AUFS_HNOTIFY
-#undef AuWkq_NEST
-#define AuWkq_NEST 0
-#endif
-
-/* wkq.c */
-int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args);
-int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
- unsigned int flags);
-void au_nwt_init(struct au_nowait_tasks *nwt);
-int __init au_wkq_init(void);
-void au_wkq_fin(void);
-
-/* ---------------------------------------------------------------------- */
-
-static inline int au_wkq_test(void)
-{
- return current->flags & PF_WQ_WORKER;
-}
-
-static inline int au_wkq_wait(au_wkq_func_t func, void *args)
-{
- return au_wkq_do_wait(AuWkq_WAIT, func, args);
-}
-
-static inline void au_nwt_done(struct au_nowait_tasks *nwt)
-{
- if (atomic_dec_and_test(&nwt->nw_len))
- wake_up_all(&nwt->nw_wq);
-}
-
-static inline int au_nwt_flush(struct au_nowait_tasks *nwt)
-{
- wait_event(nwt->nw_wq, !atomic_read(&nwt->nw_len));
- return 0;
-}
-
-#endif /* __KERNEL__ */
-#endif /* __AUFS_WKQ_H__ */
diff --git a/fs/aufs/xattr.c b/fs/aufs/xattr.c
deleted file mode 100644
index f592e05ea..000000000
--- a/fs/aufs/xattr.c
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- * Copyright (C) 2014-2016 Junjiro R. Okajima
- */
-
-/*
- * handling xattr functions
- */
-
-#include <linux/xattr.h>
-#include "aufs.h"
-
-static int au_xattr_ignore(int err, char *name, unsigned int ignore_flags)
-{
- if (!ignore_flags)
- goto out;
- switch (err) {
- case -ENOMEM:
- case -EDQUOT:
- goto out;
- }
-
- if ((ignore_flags & AuBrAttr_ICEX) == AuBrAttr_ICEX) {
- err = 0;
- goto out;
- }
-
-#define cmp(brattr, prefix) do { \
- if (!strncmp(name, XATTR_##prefix##_PREFIX, \
- XATTR_##prefix##_PREFIX_LEN)) { \
- if (ignore_flags & AuBrAttr_ICEX_##brattr) \
- err = 0; \
- goto out; \
- } \
- } while (0)
-
- cmp(SEC, SECURITY);
- cmp(SYS, SYSTEM);
- cmp(TR, TRUSTED);
- cmp(USR, USER);
-#undef cmp
-
- if (ignore_flags & AuBrAttr_ICEX_OTH)
- err = 0;
-
-out:
- return err;
-}
-
-static const int au_xattr_out_of_list = AuBrAttr_ICEX_OTH << 1;
-
-static int au_do_cpup_xattr(struct dentry *h_dst, struct dentry *h_src,
- char *name, char **buf, unsigned int ignore_flags,
- unsigned int verbose)
-{
- int err;
- ssize_t ssz;
- struct inode *h_idst;
-
- ssz = vfs_getxattr_alloc(h_src, name, buf, 0, GFP_NOFS);
- err = ssz;
- if (unlikely(err <= 0)) {
- if (err == -ENODATA
- || (err == -EOPNOTSUPP
- && ((ignore_flags & au_xattr_out_of_list)
- || (au_test_nfs_noacl(d_inode(h_src))
- && (!strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS)
- || !strcmp(name,
- XATTR_NAME_POSIX_ACL_DEFAULT))))
- ))
- err = 0;
- if (err && (verbose || au_debug_test()))
- pr_err("%s, err %d\n", name, err);
- goto out;
- }
-
- /* unlock it temporary */
- h_idst = d_inode(h_dst);
- mutex_unlock(&h_idst->i_mutex);
- err = vfsub_setxattr(h_dst, name, *buf, ssz, /*flags*/0);
- mutex_lock_nested(&h_idst->i_mutex, AuLsc_I_CHILD2);
- if (unlikely(err)) {
- if (verbose || au_debug_test())
- pr_err("%s, err %d\n", name, err);
- err = au_xattr_ignore(err, name, ignore_flags);
- }
-
-out:
- return err;
-}
-
-int au_cpup_xattr(struct dentry *h_dst, struct dentry *h_src, int ignore_flags,
- unsigned int verbose)
-{
- int err, unlocked, acl_access, acl_default;
- ssize_t ssz;
- struct inode *h_isrc, *h_idst;
- char *value, *p, *o, *e;
-
- /* try stopping to update the source inode while we are referencing */
- /* there should not be the parent-child relationship between them */
- h_isrc = d_inode(h_src);
- h_idst = d_inode(h_dst);
- mutex_unlock(&h_idst->i_mutex);
- mutex_lock_nested(&h_isrc->i_mutex, AuLsc_I_CHILD);
- mutex_lock_nested(&h_idst->i_mutex, AuLsc_I_CHILD2);
- unlocked = 0;
-
- /* some filesystems don't list POSIX ACL, for example tmpfs */
- ssz = vfs_listxattr(h_src, NULL, 0);
- err = ssz;
- if (unlikely(err < 0)) {
- AuTraceErr(err);
- if (err == -ENODATA
- || err == -EOPNOTSUPP)
- err = 0; /* ignore */
- goto out;
- }
-
- err = 0;
- p = NULL;
- o = NULL;
- if (ssz) {
- err = -ENOMEM;
- p = kmalloc(ssz, GFP_NOFS);
- o = p;
- if (unlikely(!p))
- goto out;
- err = vfs_listxattr(h_src, p, ssz);
- }
- mutex_unlock(&h_isrc->i_mutex);
- unlocked = 1;
- AuDbg("err %d, ssz %zd\n", err, ssz);
- if (unlikely(err < 0))
- goto out_free;
-
- err = 0;
- e = p + ssz;
- value = NULL;
- acl_access = 0;
- acl_default = 0;
- while (!err && p < e) {
- acl_access |= !strncmp(p, XATTR_NAME_POSIX_ACL_ACCESS,
- sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1);
- acl_default |= !strncmp(p, XATTR_NAME_POSIX_ACL_DEFAULT,
- sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)
- - 1);
- err = au_do_cpup_xattr(h_dst, h_src, p, &value, ignore_flags,
- verbose);
- p += strlen(p) + 1;
- }
- AuTraceErr(err);
- ignore_flags |= au_xattr_out_of_list;
- if (!err && !acl_access) {
- err = au_do_cpup_xattr(h_dst, h_src,
- XATTR_NAME_POSIX_ACL_ACCESS, &value,
- ignore_flags, verbose);
- AuTraceErr(err);
- }
- if (!err && !acl_default) {
- err = au_do_cpup_xattr(h_dst, h_src,
- XATTR_NAME_POSIX_ACL_DEFAULT, &value,
- ignore_flags, verbose);
- AuTraceErr(err);
- }
-
- kfree(value);
-
-out_free:
- kfree(o);
-out:
- if (!unlocked)
- mutex_unlock(&h_isrc->i_mutex);
- AuTraceErr(err);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-enum {
- AU_XATTR_LIST,
- AU_XATTR_GET
-};
-
-struct au_lgxattr {
- int type;
- union {
- struct {
- char *list;
- size_t size;
- } list;
- struct {
- const char *name;
- void *value;
- size_t size;
- } get;
- } u;
-};
-
-static ssize_t au_lgxattr(struct dentry *dentry, struct au_lgxattr *arg)
-{
- ssize_t err;
- struct path h_path;
- struct super_block *sb;
-
- sb = dentry->d_sb;
- err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
- if (unlikely(err))
- goto out;
- err = au_h_path_getattr(dentry, /*force*/1, &h_path);
- if (unlikely(err))
- goto out_si;
- if (unlikely(!h_path.dentry))
- /* illegally overlapped or something */
- goto out_di; /* pretending success */
-
- /* always topmost entry only */
- switch (arg->type) {
- case AU_XATTR_LIST:
- err = vfs_listxattr(h_path.dentry,
- arg->u.list.list, arg->u.list.size);
- break;
- case AU_XATTR_GET:
- err = vfs_getxattr(h_path.dentry,
- arg->u.get.name, arg->u.get.value,
- arg->u.get.size);
- break;
- }
-
-out_di:
- di_read_unlock(dentry, AuLock_IR);
-out_si:
- si_read_unlock(sb);
-out:
- AuTraceErr(err);
- return err;
-}
-
-ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size)
-{
- struct au_lgxattr arg = {
- .type = AU_XATTR_LIST,
- .u.list = {
- .list = list,
- .size = size
- },
- };
-
- return au_lgxattr(dentry, &arg);
-}
-
-ssize_t aufs_getxattr(struct dentry *dentry, const char *name, void *value,
- size_t size)
-{
- struct au_lgxattr arg = {
- .type = AU_XATTR_GET,
- .u.get = {
- .name = name,
- .value = value,
- .size = size
- },
- };
-
- return au_lgxattr(dentry, &arg);
-}
-
-int aufs_setxattr(struct dentry *dentry, const char *name, const void *value,
- size_t size, int flags)
-{
- struct au_srxattr arg = {
- .type = AU_XATTR_SET,
- .u.set = {
- .name = name,
- .value = value,
- .size = size,
- .flags = flags
- },
- };
-
- return au_srxattr(dentry, &arg);
-}
-
-int aufs_removexattr(struct dentry *dentry, const char *name)
-{
- struct au_srxattr arg = {
- .type = AU_XATTR_REMOVE,
- .u.remove = {
- .name = name
- },
- };
-
- return au_srxattr(dentry, &arg);
-}
-
-/* ---------------------------------------------------------------------- */
-
-#if 0
-static size_t au_xattr_list(struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len, int type)
-{
- return aufs_listxattr(dentry, list, list_size);
-}
-
-static int au_xattr_get(struct dentry *dentry, const char *name, void *buffer,
- size_t size, int type)
-{
- return aufs_getxattr(dentry, name, buffer, size);
-}
-
-static int au_xattr_set(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags, int type)
-{
- return aufs_setxattr(dentry, name, value, size, flags);
-}
-
-static const struct xattr_handler au_xattr_handler = {
- /* no prefix, no flags */
- .list = au_xattr_list,
- .get = au_xattr_get,
- .set = au_xattr_set
- /* why no remove? */
-};
-
-static const struct xattr_handler *au_xattr_handlers[] = {
- &au_xattr_handler
-};
-
-void au_xattr_init(struct super_block *sb)
-{
- /* sb->s_xattr = au_xattr_handlers; */
-}
-#endif
diff --git a/fs/aufs/xino.c b/fs/aufs/xino.c
deleted file mode 100644
index 994258e3f..000000000
--- a/fs/aufs/xino.c
+++ /dev/null
@@ -1,1305 +0,0 @@
-/*
- * Copyright (C) 2005-2016 Junjiro R. Okajima
- */
-
-/*
- * external inode number translation table and bitmap
- */
-
-#include <linux/seq_file.h>
-#include <linux/statfs.h>
-#include "aufs.h"
-
-/* todo: unnecessary to support mmap_sem since kernel-space? */
-ssize_t xino_fread(vfs_readf_t func, struct file *file, void *kbuf, size_t size,
- loff_t *pos)
-{
- ssize_t err;
- mm_segment_t oldfs;
- union {
- void *k;
- char __user *u;
- } buf;
-
- buf.k = kbuf;
- oldfs = get_fs();
- set_fs(KERNEL_DS);
- do {
- /* todo: signal_pending? */
- err = func(file, buf.u, size, pos);
- } while (err == -EAGAIN || err == -EINTR);
- set_fs(oldfs);
-
-#if 0 /* reserved for future use */
- if (err > 0)
- fsnotify_access(file->f_path.dentry);
-#endif
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
- size_t size, loff_t *pos);
-
-static ssize_t do_xino_fwrite(vfs_writef_t func, struct file *file, void *kbuf,
- size_t size, loff_t *pos)
-{
- ssize_t err;
- mm_segment_t oldfs;
- union {
- void *k;
- const char __user *u;
- } buf;
- int i;
- const int prevent_endless = 10;
-
- i = 0;
- buf.k = kbuf;
- oldfs = get_fs();
- set_fs(KERNEL_DS);
- do {
- err = func(file, buf.u, size, pos);
- if (err == -EINTR
- && !au_wkq_test()
- && fatal_signal_pending(current)) {
- set_fs(oldfs);
- err = xino_fwrite_wkq(func, file, kbuf, size, pos);
- BUG_ON(err == -EINTR);
- oldfs = get_fs();
- set_fs(KERNEL_DS);
- }
- } while (i++ < prevent_endless
- && (err == -EAGAIN || err == -EINTR));
- set_fs(oldfs);
-
-#if 0 /* reserved for future use */
- if (err > 0)
- fsnotify_modify(file->f_path.dentry);
-#endif
-
- return err;
-}
-
-struct do_xino_fwrite_args {
- ssize_t *errp;
- vfs_writef_t func;
- struct file *file;
- void *buf;
- size_t size;
- loff_t *pos;
-};
-
-static void call_do_xino_fwrite(void *args)
-{
- struct do_xino_fwrite_args *a = args;
- *a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
-}
-
-static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
- size_t size, loff_t *pos)
-{
- ssize_t err;
- int wkq_err;
- struct do_xino_fwrite_args args = {
- .errp = &err,
- .func = func,
- .file = file,
- .buf = buf,
- .size = size,
- .pos = pos
- };
-
- /*
- * it breaks RLIMIT_FSIZE and normal user's limit,
- * users should care about quota and real 'filesystem full.'
- */
- wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
- if (unlikely(wkq_err))
- err = wkq_err;
-
- return err;
-}
-
-ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
- size_t size, loff_t *pos)
-{
- ssize_t err;
-
- if (rlimit(RLIMIT_FSIZE) == RLIM_INFINITY) {
- lockdep_off();
- err = do_xino_fwrite(func, file, buf, size, pos);
- lockdep_on();
- } else
- err = xino_fwrite_wkq(func, file, buf, size, pos);
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * create a new xinofile at the same place/path as @base_file.
- */
-struct file *au_xino_create2(struct file *base_file, struct file *copy_src)
-{
- struct file *file;
- struct dentry *base, *parent;
- struct inode *dir, *delegated;
- struct qstr *name;
- struct path path;
- int err;
-
- base = base_file->f_path.dentry;
- parent = base->d_parent; /* dir inode is locked */
- dir = d_inode(parent);
- IMustLock(dir);
-
- file = ERR_PTR(-EINVAL);
- name = &base->d_name;
- path.dentry = vfsub_lookup_one_len(name->name, parent, name->len);
- if (IS_ERR(path.dentry)) {
- file = (void *)path.dentry;
- pr_err("%pd lookup err %ld\n",
- base, PTR_ERR(path.dentry));
- goto out;
- }
-
- /* no need to mnt_want_write() since we call dentry_open() later */
- err = vfs_create(dir, path.dentry, S_IRUGO | S_IWUGO, NULL);
- if (unlikely(err)) {
- file = ERR_PTR(err);
- pr_err("%pd create err %d\n", base, err);
- goto out_dput;
- }
-
- path.mnt = base_file->f_path.mnt;
- file = vfsub_dentry_open(&path,
- O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
- /* | __FMODE_NONOTIFY */);
- if (IS_ERR(file)) {
- pr_err("%pd open err %ld\n", base, PTR_ERR(file));
- goto out_dput;
- }
-
- delegated = NULL;
- err = vfsub_unlink(dir, &file->f_path, &delegated, /*force*/0);
- if (unlikely(err == -EWOULDBLOCK)) {
- pr_warn("cannot retry for NFSv4 delegation"
- " for an internal unlink\n");
- iput(delegated);
- }
- if (unlikely(err)) {
- pr_err("%pd unlink err %d\n", base, err);
- goto out_fput;
- }
-
- if (copy_src) {
- /* no one can touch copy_src xino */
- err = au_copy_file(file, copy_src, vfsub_f_size_read(copy_src));
- if (unlikely(err)) {
- pr_err("%pd copy err %d\n", base, err);
- goto out_fput;
- }
- }
- goto out_dput; /* success */
-
-out_fput:
- fput(file);
- file = ERR_PTR(err);
-out_dput:
- dput(path.dentry);
-out:
- return file;
-}
-
-struct au_xino_lock_dir {
- struct au_hinode *hdir;
- struct dentry *parent;
- struct mutex *mtx;
-};
-
-static void au_xino_lock_dir(struct super_block *sb, struct file *xino,
- struct au_xino_lock_dir *ldir)
-{
- aufs_bindex_t brid, bindex;
-
- ldir->hdir = NULL;
- bindex = -1;
- brid = au_xino_brid(sb);
- if (brid >= 0)
- bindex = au_br_index(sb, brid);
- if (bindex >= 0) {
- ldir->hdir = au_hi(d_inode(sb->s_root), bindex);
- au_hn_imtx_lock_nested(ldir->hdir, AuLsc_I_PARENT);
- } else {
- ldir->parent = dget_parent(xino->f_path.dentry);
- ldir->mtx = &d_inode(ldir->parent)->i_mutex;
- mutex_lock_nested(ldir->mtx, AuLsc_I_PARENT);
- }
-}
-
-static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
-{
- if (ldir->hdir)
- au_hn_imtx_unlock(ldir->hdir);
- else {
- mutex_unlock(ldir->mtx);
- dput(ldir->parent);
- }
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* trucate xino files asynchronously */
-
-int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex)
-{
- int err;
- unsigned long jiffy;
- blkcnt_t blocks;
- aufs_bindex_t bi, bend;
- struct kstatfs *st;
- struct au_branch *br;
- struct file *new_xino, *file;
- struct super_block *h_sb;
- struct au_xino_lock_dir ldir;
-
- err = -ENOMEM;
- st = kmalloc(sizeof(*st), GFP_NOFS);
- if (unlikely(!st))
- goto out;
-
- err = -EINVAL;
- bend = au_sbend(sb);
- if (unlikely(bindex < 0 || bend < bindex))
- goto out_st;
- br = au_sbr(sb, bindex);
- file = br->br_xino.xi_file;
- if (!file)
- goto out_st;
-
- err = vfs_statfs(&file->f_path, st);
- if (unlikely(err))
- AuErr1("statfs err %d, ignored\n", err);
- jiffy = jiffies;
- blocks = file_inode(file)->i_blocks;
- pr_info("begin truncating xino(b%d), ib%llu, %llu/%llu free blks\n",
- bindex, (u64)blocks, st->f_bfree, st->f_blocks);
-
- au_xino_lock_dir(sb, file, &ldir);
- /* mnt_want_write() is unnecessary here */
- new_xino = au_xino_create2(file, file);
- au_xino_unlock_dir(&ldir);
- err = PTR_ERR(new_xino);
- if (IS_ERR(new_xino)) {
- pr_err("err %d, ignored\n", err);
- goto out_st;
- }
- err = 0;
- fput(file);
- br->br_xino.xi_file = new_xino;
-
- h_sb = au_br_sb(br);
- for (bi = 0; bi <= bend; bi++) {
- if (unlikely(bi == bindex))
- continue;
- br = au_sbr(sb, bi);
- if (au_br_sb(br) != h_sb)
- continue;
-
- fput(br->br_xino.xi_file);
- br->br_xino.xi_file = new_xino;
- get_file(new_xino);
- }
-
- err = vfs_statfs(&new_xino->f_path, st);
- if (!err) {
- pr_info("end truncating xino(b%d), ib%llu, %llu/%llu free blks\n",
- bindex, (u64)file_inode(new_xino)->i_blocks,
- st->f_bfree, st->f_blocks);
- if (file_inode(new_xino)->i_blocks < blocks)
- au_sbi(sb)->si_xino_jiffy = jiffy;
- } else
- AuErr1("statfs err %d, ignored\n", err);
-
-out_st:
- kfree(st);
-out:
- return err;
-}
-
-struct xino_do_trunc_args {
- struct super_block *sb;
- struct au_branch *br;
-};
-
-static void xino_do_trunc(void *_args)
-{
- struct xino_do_trunc_args *args = _args;
- struct super_block *sb;
- struct au_branch *br;
- struct inode *dir;
- int err;
- aufs_bindex_t bindex;
-
- err = 0;
- sb = args->sb;
- dir = d_inode(sb->s_root);
- br = args->br;
-
- si_noflush_write_lock(sb);
- ii_read_lock_parent(dir);
- bindex = au_br_index(sb, br->br_id);
- err = au_xino_trunc(sb, bindex);
- ii_read_unlock(dir);
- if (unlikely(err))
- pr_warn("err b%d, (%d)\n", bindex, err);
- atomic_dec(&br->br_xino_running);
- atomic_dec(&br->br_count);
- si_write_unlock(sb);
- au_nwt_done(&au_sbi(sb)->si_nowait);
- kfree(args);
-}
-
-static int xino_trunc_test(struct super_block *sb, struct au_branch *br)
-{
- int err;
- struct kstatfs st;
- struct au_sbinfo *sbinfo;
-
- /* todo: si_xino_expire and the ratio should be customizable */
- sbinfo = au_sbi(sb);
- if (time_before(jiffies,
- sbinfo->si_xino_jiffy + sbinfo->si_xino_expire))
- return 0;
-
- /* truncation border */
- err = vfs_statfs(&br->br_xino.xi_file->f_path, &st);
- if (unlikely(err)) {
- AuErr1("statfs err %d, ignored\n", err);
- return 0;
- }
- if (div64_u64(st.f_bfree * 100, st.f_blocks) >= AUFS_XINO_DEF_TRUNC)
- return 0;
-
- return 1;
-}
-
-static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
-{
- struct xino_do_trunc_args *args;
- int wkq_err;
-
- if (!xino_trunc_test(sb, br))
- return;
-
- if (atomic_inc_return(&br->br_xino_running) > 1)
- goto out;
-
- /* lock and kfree() will be called in trunc_xino() */
- args = kmalloc(sizeof(*args), GFP_NOFS);
- if (unlikely(!args)) {
- AuErr1("no memory\n");
- goto out_args;
- }
-
- atomic_inc(&br->br_count);
- args->sb = sb;
- args->br = br;
- wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0);
- if (!wkq_err)
- return; /* success */
-
- pr_err("wkq %d\n", wkq_err);
- atomic_dec(&br->br_count);
-
-out_args:
- kfree(args);
-out:
- atomic_dec(&br->br_xino_running);
-}
-
-/* ---------------------------------------------------------------------- */
-
-static int au_xino_do_write(vfs_writef_t write, struct file *file,
- ino_t h_ino, ino_t ino)
-{
- loff_t pos;
- ssize_t sz;
-
- pos = h_ino;
- if (unlikely(au_loff_max / sizeof(ino) - 1 < pos)) {
- AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
- return -EFBIG;
- }
- pos *= sizeof(ino);
- sz = xino_fwrite(write, file, &ino, sizeof(ino), &pos);
- if (sz == sizeof(ino))
- return 0; /* success */
-
- AuIOErr("write failed (%zd)\n", sz);
- return -EIO;
-}
-
-/*
- * write @ino to the xinofile for the specified branch{@sb, @bindex}
- * at the position of @h_ino.
- * even if @ino is zero, it is written to the xinofile and means no entry.
- * if the size of the xino file on a specific filesystem exceeds the watermark,
- * try truncating it.
- */
-int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
- ino_t ino)
-{
- int err;
- unsigned int mnt_flags;
- struct au_branch *br;
-
- BUILD_BUG_ON(sizeof(long long) != sizeof(au_loff_max)
- || ((loff_t)-1) > 0);
- SiMustAnyLock(sb);
-
- mnt_flags = au_mntflags(sb);
- if (!au_opt_test(mnt_flags, XINO))
- return 0;
-
- br = au_sbr(sb, bindex);
- err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
- h_ino, ino);
- if (!err) {
- if (au_opt_test(mnt_flags, TRUNC_XINO)
- && au_test_fs_trunc_xino(au_br_sb(br)))
- xino_try_trunc(sb, br);
- return 0; /* success */
- }
-
- AuIOErr("write failed (%d)\n", err);
- return -EIO;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* aufs inode number bitmap */
-
-static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
-static ino_t xib_calc_ino(unsigned long pindex, int bit)
-{
- ino_t ino;
-
- AuDebugOn(bit < 0 || page_bits <= bit);
- ino = AUFS_FIRST_INO + pindex * page_bits + bit;
- return ino;
-}
-
-static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
-{
- AuDebugOn(ino < AUFS_FIRST_INO);
- ino -= AUFS_FIRST_INO;
- *pindex = ino / page_bits;
- *bit = ino % page_bits;
-}
-
-static int xib_pindex(struct super_block *sb, unsigned long pindex)
-{
- int err;
- loff_t pos;
- ssize_t sz;
- struct au_sbinfo *sbinfo;
- struct file *xib;
- unsigned long *p;
-
- sbinfo = au_sbi(sb);
- MtxMustLock(&sbinfo->si_xib_mtx);
- AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
- || !au_opt_test(sbinfo->si_mntflags, XINO));
-
- if (pindex == sbinfo->si_xib_last_pindex)
- return 0;
-
- xib = sbinfo->si_xib;
- p = sbinfo->si_xib_buf;
- pos = sbinfo->si_xib_last_pindex;
- pos *= PAGE_SIZE;
- sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
- if (unlikely(sz != PAGE_SIZE))
- goto out;
-
- pos = pindex;
- pos *= PAGE_SIZE;
- if (vfsub_f_size_read(xib) >= pos + PAGE_SIZE)
- sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
- else {
- memset(p, 0, PAGE_SIZE);
- sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
- }
- if (sz == PAGE_SIZE) {
- sbinfo->si_xib_last_pindex = pindex;
- return 0; /* success */
- }
-
-out:
- AuIOErr1("write failed (%zd)\n", sz);
- err = sz;
- if (sz >= 0)
- err = -EIO;
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-static void au_xib_clear_bit(struct inode *inode)
-{
- int err, bit;
- unsigned long pindex;
- struct super_block *sb;
- struct au_sbinfo *sbinfo;
-
- AuDebugOn(inode->i_nlink);
-
- sb = inode->i_sb;
- xib_calc_bit(inode->i_ino, &pindex, &bit);
- AuDebugOn(page_bits <= bit);
- sbinfo = au_sbi(sb);
- mutex_lock(&sbinfo->si_xib_mtx);
- err = xib_pindex(sb, pindex);
- if (!err) {
- clear_bit(bit, sbinfo->si_xib_buf);
- sbinfo->si_xib_next_bit = bit;
- }
- mutex_unlock(&sbinfo->si_xib_mtx);
-}
-
-/* for s_op->delete_inode() */
-void au_xino_delete_inode(struct inode *inode, const int unlinked)
-{
- int err;
- unsigned int mnt_flags;
- aufs_bindex_t bindex, bend, bi;
- unsigned char try_trunc;
- struct au_iinfo *iinfo;
- struct super_block *sb;
- struct au_hinode *hi;
- struct inode *h_inode;
- struct au_branch *br;
- vfs_writef_t xwrite;
-
- sb = inode->i_sb;
- mnt_flags = au_mntflags(sb);
- if (!au_opt_test(mnt_flags, XINO)
- || inode->i_ino == AUFS_ROOT_INO)
- return;
-
- if (unlinked) {
- au_xigen_inc(inode);
- au_xib_clear_bit(inode);
- }
-
- iinfo = au_ii(inode);
- if (!iinfo)
- return;
-
- bindex = iinfo->ii_bstart;
- if (bindex < 0)
- return;
-
- xwrite = au_sbi(sb)->si_xwrite;
- try_trunc = !!au_opt_test(mnt_flags, TRUNC_XINO);
- hi = iinfo->ii_hinode + bindex;
- bend = iinfo->ii_bend;
- for (; bindex <= bend; bindex++, hi++) {
- h_inode = hi->hi_inode;
- if (!h_inode
- || (!unlinked && h_inode->i_nlink))
- continue;
-
- /* inode may not be revalidated */
- bi = au_br_index(sb, hi->hi_id);
- if (bi < 0)
- continue;
-
- br = au_sbr(sb, bi);
- err = au_xino_do_write(xwrite, br->br_xino.xi_file,
- h_inode->i_ino, /*ino*/0);
- if (!err && try_trunc
- && au_test_fs_trunc_xino(au_br_sb(br)))
- xino_try_trunc(sb, br);
- }
-}
-
-/* get an unused inode number from bitmap */
-ino_t au_xino_new_ino(struct super_block *sb)
-{
- ino_t ino;
- unsigned long *p, pindex, ul, pend;
- struct au_sbinfo *sbinfo;
- struct file *file;
- int free_bit, err;
-
- if (!au_opt_test(au_mntflags(sb), XINO))
- return iunique(sb, AUFS_FIRST_INO);
-
- sbinfo = au_sbi(sb);
- mutex_lock(&sbinfo->si_xib_mtx);
- p = sbinfo->si_xib_buf;
- free_bit = sbinfo->si_xib_next_bit;
- if (free_bit < page_bits && !test_bit(free_bit, p))
- goto out; /* success */
- free_bit = find_first_zero_bit(p, page_bits);
- if (free_bit < page_bits)
- goto out; /* success */
-
- pindex = sbinfo->si_xib_last_pindex;
- for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
- err = xib_pindex(sb, ul);
- if (unlikely(err))
- goto out_err;
- free_bit = find_first_zero_bit(p, page_bits);
- if (free_bit < page_bits)
- goto out; /* success */
- }
-
- file = sbinfo->si_xib;
- pend = vfsub_f_size_read(file) / PAGE_SIZE;
- for (ul = pindex + 1; ul <= pend; ul++) {
- err = xib_pindex(sb, ul);
- if (unlikely(err))
- goto out_err;
- free_bit = find_first_zero_bit(p, page_bits);
- if (free_bit < page_bits)
- goto out; /* success */
- }
- BUG();
-
-out:
- set_bit(free_bit, p);
- sbinfo->si_xib_next_bit = free_bit + 1;
- pindex = sbinfo->si_xib_last_pindex;
- mutex_unlock(&sbinfo->si_xib_mtx);
- ino = xib_calc_ino(pindex, free_bit);
- AuDbg("i%lu\n", (unsigned long)ino);
- return ino;
-out_err:
- mutex_unlock(&sbinfo->si_xib_mtx);
- AuDbg("i0\n");
- return 0;
-}
-
-/*
- * read @ino from xinofile for the specified branch{@sb, @bindex}
- * at the position of @h_ino.
- * if @ino does not exist and @do_new is true, get new one.
- */
-int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
- ino_t *ino)
-{
- int err;
- ssize_t sz;
- loff_t pos;
- struct file *file;
- struct au_sbinfo *sbinfo;
-
- *ino = 0;
- if (!au_opt_test(au_mntflags(sb), XINO))
- return 0; /* no xino */
-
- err = 0;
- sbinfo = au_sbi(sb);
- pos = h_ino;
- if (unlikely(au_loff_max / sizeof(*ino) - 1 < pos)) {
- AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
- return -EFBIG;
- }
- pos *= sizeof(*ino);
-
- file = au_sbr(sb, bindex)->br_xino.xi_file;
- if (vfsub_f_size_read(file) < pos + sizeof(*ino))
- return 0; /* no ino */
-
- sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &pos);
- if (sz == sizeof(*ino))
- return 0; /* success */
-
- err = sz;
- if (unlikely(sz >= 0)) {
- err = -EIO;
- AuIOErr("xino read error (%zd)\n", sz);
- }
-
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* create and set a new xino file */
-
-struct file *au_xino_create(struct super_block *sb, char *fname, int silent)
-{
- struct file *file;
- struct dentry *h_parent, *d;
- struct inode *h_dir, *inode;
- int err;
-
- /*
- * at mount-time, and the xino file is the default path,
- * hnotify is disabled so we have no notify events to ignore.
- * when a user specified the xino, we cannot get au_hdir to be ignored.
- */
- file = vfsub_filp_open(fname, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
- /* | __FMODE_NONOTIFY */,
- S_IRUGO | S_IWUGO);
- if (IS_ERR(file)) {
- if (!silent)
- pr_err("open %s(%ld)\n", fname, PTR_ERR(file));
- return file;
- }
-
- /* keep file count */
- err = 0;
- inode = file_inode(file);
- h_parent = dget_parent(file->f_path.dentry);
- h_dir = d_inode(h_parent);
- mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
- /* mnt_want_write() is unnecessary here */
- /* no delegation since it is just created */
- if (inode->i_nlink)
- err = vfsub_unlink(h_dir, &file->f_path, /*delegated*/NULL,
- /*force*/0);
- mutex_unlock(&h_dir->i_mutex);
- dput(h_parent);
- if (unlikely(err)) {
- if (!silent)
- pr_err("unlink %s(%d)\n", fname, err);
- goto out;
- }
-
- err = -EINVAL;
- d = file->f_path.dentry;
- if (unlikely(sb == d->d_sb)) {
- if (!silent)
- pr_err("%s must be outside\n", fname);
- goto out;
- }
- if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
- if (!silent)
- pr_err("xino doesn't support %s(%s)\n",
- fname, au_sbtype(d->d_sb));
- goto out;
- }
- return file; /* success */
-
-out:
- fput(file);
- file = ERR_PTR(err);
- return file;
-}
-
-/*
- * find another branch who is on the same filesystem of the specified
- * branch{@btgt}. search until @bend.
- */
-static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
- aufs_bindex_t bend)
-{
- aufs_bindex_t bindex;
- struct super_block *tgt_sb = au_sbr_sb(sb, btgt);
-
- for (bindex = 0; bindex < btgt; bindex++)
- if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
- return bindex;
- for (bindex++; bindex <= bend; bindex++)
- if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
- return bindex;
- return -1;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * initialize the xinofile for the specified branch @br
- * at the place/path where @base_file indicates.
- * test whether another branch is on the same filesystem or not,
- * if @do_test is true.
- */
-int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
- struct file *base_file, int do_test)
-{
- int err;
- ino_t ino;
- aufs_bindex_t bend, bindex;
- struct au_branch *shared_br, *b;
- struct file *file;
- struct super_block *tgt_sb;
-
- shared_br = NULL;
- bend = au_sbend(sb);
- if (do_test) {
- tgt_sb = au_br_sb(br);
- for (bindex = 0; bindex <= bend; bindex++) {
- b = au_sbr(sb, bindex);
- if (tgt_sb == au_br_sb(b)) {
- shared_br = b;
- break;
- }
- }
- }
-
- if (!shared_br || !shared_br->br_xino.xi_file) {
- struct au_xino_lock_dir ldir;
-
- au_xino_lock_dir(sb, base_file, &ldir);
- /* mnt_want_write() is unnecessary here */
- file = au_xino_create2(base_file, NULL);
- au_xino_unlock_dir(&ldir);
- err = PTR_ERR(file);
- if (IS_ERR(file))
- goto out;
- br->br_xino.xi_file = file;
- } else {
- br->br_xino.xi_file = shared_br->br_xino.xi_file;
- get_file(br->br_xino.xi_file);
- }
-
- ino = AUFS_ROOT_INO;
- err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
- h_ino, ino);
- if (unlikely(err)) {
- fput(br->br_xino.xi_file);
- br->br_xino.xi_file = NULL;
- }
-
-out:
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/* trucate a xino bitmap file */
-
-/* todo: slow */
-static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
-{
- int err, bit;
- ssize_t sz;
- unsigned long pindex;
- loff_t pos, pend;
- struct au_sbinfo *sbinfo;
- vfs_readf_t func;
- ino_t *ino;
- unsigned long *p;
-
- err = 0;
- sbinfo = au_sbi(sb);
- MtxMustLock(&sbinfo->si_xib_mtx);
- p = sbinfo->si_xib_buf;
- func = sbinfo->si_xread;
- pend = vfsub_f_size_read(file);
- pos = 0;
- while (pos < pend) {
- sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
- err = sz;
- if (unlikely(sz <= 0))
- goto out;
-
- err = 0;
- for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
- if (unlikely(*ino < AUFS_FIRST_INO))
- continue;
-
- xib_calc_bit(*ino, &pindex, &bit);
- AuDebugOn(page_bits <= bit);
- err = xib_pindex(sb, pindex);
- if (!err)
- set_bit(bit, p);
- else
- goto out;
- }
- }
-
-out:
- return err;
-}
-
-static int xib_restore(struct super_block *sb)
-{
- int err;
- aufs_bindex_t bindex, bend;
- void *page;
-
- err = -ENOMEM;
- page = (void *)__get_free_page(GFP_NOFS);
- if (unlikely(!page))
- goto out;
-
- err = 0;
- bend = au_sbend(sb);
- for (bindex = 0; !err && bindex <= bend; bindex++)
- if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0)
- err = do_xib_restore
- (sb, au_sbr(sb, bindex)->br_xino.xi_file, page);
- else
- AuDbg("b%d\n", bindex);
- free_page((unsigned long)page);
-
-out:
- return err;
-}
-
-int au_xib_trunc(struct super_block *sb)
-{
- int err;
- ssize_t sz;
- loff_t pos;
- struct au_xino_lock_dir ldir;
- struct au_sbinfo *sbinfo;
- unsigned long *p;
- struct file *file;
-
- SiMustWriteLock(sb);
-
- err = 0;
- sbinfo = au_sbi(sb);
- if (!au_opt_test(sbinfo->si_mntflags, XINO))
- goto out;
-
- file = sbinfo->si_xib;
- if (vfsub_f_size_read(file) <= PAGE_SIZE)
- goto out;
-
- au_xino_lock_dir(sb, file, &ldir);
- /* mnt_want_write() is unnecessary here */
- file = au_xino_create2(sbinfo->si_xib, NULL);
- au_xino_unlock_dir(&ldir);
- err = PTR_ERR(file);
- if (IS_ERR(file))
- goto out;
- fput(sbinfo->si_xib);
- sbinfo->si_xib = file;
-
- p = sbinfo->si_xib_buf;
- memset(p, 0, PAGE_SIZE);
- pos = 0;
- sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos);
- if (unlikely(sz != PAGE_SIZE)) {
- err = sz;
- AuIOErr("err %d\n", err);
- if (sz >= 0)
- err = -EIO;
- goto out;
- }
-
- mutex_lock(&sbinfo->si_xib_mtx);
- /* mnt_want_write() is unnecessary here */
- err = xib_restore(sb);
- mutex_unlock(&sbinfo->si_xib_mtx);
-
-out:
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * xino mount option handlers
- */
-
-/* xino bitmap */
-static void xino_clear_xib(struct super_block *sb)
-{
- struct au_sbinfo *sbinfo;
-
- SiMustWriteLock(sb);
-
- sbinfo = au_sbi(sb);
- sbinfo->si_xread = NULL;
- sbinfo->si_xwrite = NULL;
- if (sbinfo->si_xib)
- fput(sbinfo->si_xib);
- sbinfo->si_xib = NULL;
- free_page((unsigned long)sbinfo->si_xib_buf);
- sbinfo->si_xib_buf = NULL;
-}
-
-static int au_xino_set_xib(struct super_block *sb, struct file *base)
-{
- int err;
- loff_t pos;
- struct au_sbinfo *sbinfo;
- struct file *file;
-
- SiMustWriteLock(sb);
-
- sbinfo = au_sbi(sb);
- file = au_xino_create2(base, sbinfo->si_xib);
- err = PTR_ERR(file);
- if (IS_ERR(file))
- goto out;
- if (sbinfo->si_xib)
- fput(sbinfo->si_xib);
- sbinfo->si_xib = file;
- sbinfo->si_xread = vfs_readf(file);
- sbinfo->si_xwrite = vfs_writef(file);
-
- err = -ENOMEM;
- if (!sbinfo->si_xib_buf)
- sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS);
- if (unlikely(!sbinfo->si_xib_buf))
- goto out_unset;
-
- sbinfo->si_xib_last_pindex = 0;
- sbinfo->si_xib_next_bit = 0;
- if (vfsub_f_size_read(file) < PAGE_SIZE) {
- pos = 0;
- err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf,
- PAGE_SIZE, &pos);
- if (unlikely(err != PAGE_SIZE))
- goto out_free;
- }
- err = 0;
- goto out; /* success */
-
-out_free:
- free_page((unsigned long)sbinfo->si_xib_buf);
- sbinfo->si_xib_buf = NULL;
- if (err >= 0)
- err = -EIO;
-out_unset:
- fput(sbinfo->si_xib);
- sbinfo->si_xib = NULL;
- sbinfo->si_xread = NULL;
- sbinfo->si_xwrite = NULL;
-out:
- return err;
-}
-
-/* xino for each branch */
-static void xino_clear_br(struct super_block *sb)
-{
- aufs_bindex_t bindex, bend;
- struct au_branch *br;
-
- bend = au_sbend(sb);
- for (bindex = 0; bindex <= bend; bindex++) {
- br = au_sbr(sb, bindex);
- if (!br || !br->br_xino.xi_file)
- continue;
-
- fput(br->br_xino.xi_file);
- br->br_xino.xi_file = NULL;
- }
-}
-
-static int au_xino_set_br(struct super_block *sb, struct file *base)
-{
- int err;
- ino_t ino;
- aufs_bindex_t bindex, bend, bshared;
- struct {
- struct file *old, *new;
- } *fpair, *p;
- struct au_branch *br;
- struct inode *inode;
- vfs_writef_t writef;
-
- SiMustWriteLock(sb);
-
- err = -ENOMEM;
- bend = au_sbend(sb);
- fpair = kcalloc(bend + 1, sizeof(*fpair), GFP_NOFS);
- if (unlikely(!fpair))
- goto out;
-
- inode = d_inode(sb->s_root);
- ino = AUFS_ROOT_INO;
- writef = au_sbi(sb)->si_xwrite;
- for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
- br = au_sbr(sb, bindex);
- bshared = is_sb_shared(sb, bindex, bindex - 1);
- if (bshared >= 0) {
- /* shared xino */
- *p = fpair[bshared];
- get_file(p->new);
- }
-
- if (!p->new) {
- /* new xino */
- p->old = br->br_xino.xi_file;
- p->new = au_xino_create2(base, br->br_xino.xi_file);
- err = PTR_ERR(p->new);
- if (IS_ERR(p->new)) {
- p->new = NULL;
- goto out_pair;
- }
- }
-
- err = au_xino_do_write(writef, p->new,
- au_h_iptr(inode, bindex)->i_ino, ino);
- if (unlikely(err))
- goto out_pair;
- }
-
- for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
- br = au_sbr(sb, bindex);
- if (br->br_xino.xi_file)
- fput(br->br_xino.xi_file);
- get_file(p->new);
- br->br_xino.xi_file = p->new;
- }
-
-out_pair:
- for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++)
- if (p->new)
- fput(p->new);
- else
- break;
- kfree(fpair);
-out:
- return err;
-}
-
-void au_xino_clr(struct super_block *sb)
-{
- struct au_sbinfo *sbinfo;
-
- au_xigen_clr(sb);
- xino_clear_xib(sb);
- xino_clear_br(sb);
- sbinfo = au_sbi(sb);
- /* lvalue, do not call au_mntflags() */
- au_opt_clr(sbinfo->si_mntflags, XINO);
-}
-
-int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount)
-{
- int err, skip;
- struct dentry *parent, *cur_parent;
- struct qstr *dname, *cur_name;
- struct file *cur_xino;
- struct inode *dir;
- struct au_sbinfo *sbinfo;
-
- SiMustWriteLock(sb);
-
- err = 0;
- sbinfo = au_sbi(sb);
- parent = dget_parent(xino->file->f_path.dentry);
- if (remount) {
- skip = 0;
- dname = &xino->file->f_path.dentry->d_name;
- cur_xino = sbinfo->si_xib;
- if (cur_xino) {
- cur_parent = dget_parent(cur_xino->f_path.dentry);
- cur_name = &cur_xino->f_path.dentry->d_name;
- skip = (cur_parent == parent
- && au_qstreq(dname, cur_name));
- dput(cur_parent);
- }
- if (skip)
- goto out;
- }
-
- au_opt_set(sbinfo->si_mntflags, XINO);
- dir = d_inode(parent);
- mutex_lock_nested(&dir->i_mutex, AuLsc_I_PARENT);
- /* mnt_want_write() is unnecessary here */
- err = au_xino_set_xib(sb, xino->file);
- if (!err)
- err = au_xigen_set(sb, xino->file);
- if (!err)
- err = au_xino_set_br(sb, xino->file);
- mutex_unlock(&dir->i_mutex);
- if (!err)
- goto out; /* success */
-
- /* reset all */
- AuIOErr("failed creating xino(%d).\n", err);
- au_xigen_clr(sb);
- xino_clear_xib(sb);
-
-out:
- dput(parent);
- return err;
-}
-
-/* ---------------------------------------------------------------------- */
-
-/*
- * create a xinofile at the default place/path.
- */
-struct file *au_xino_def(struct super_block *sb)
-{
- struct file *file;
- char *page, *p;
- struct au_branch *br;
- struct super_block *h_sb;
- struct path path;
- aufs_bindex_t bend, bindex, bwr;
-
- br = NULL;
- bend = au_sbend(sb);
- bwr = -1;
- for (bindex = 0; bindex <= bend; bindex++) {
- br = au_sbr(sb, bindex);
- if (au_br_writable(br->br_perm)
- && !au_test_fs_bad_xino(au_br_sb(br))) {
- bwr = bindex;
- break;
- }
- }
-
- if (bwr >= 0) {
- file = ERR_PTR(-ENOMEM);
- page = (void *)__get_free_page(GFP_NOFS);
- if (unlikely(!page))
- goto out;
- path.mnt = au_br_mnt(br);
- path.dentry = au_h_dptr(sb->s_root, bwr);
- p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME));
- file = (void *)p;
- if (!IS_ERR(p)) {
- strcat(p, "/" AUFS_XINO_FNAME);
- AuDbg("%s\n", p);
- file = au_xino_create(sb, p, /*silent*/0);
- if (!IS_ERR(file))
- au_xino_brid_set(sb, br->br_id);
- }
- free_page((unsigned long)page);
- } else {
- file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0);
- if (IS_ERR(file))
- goto out;
- h_sb = file->f_path.dentry->d_sb;
- if (unlikely(au_test_fs_bad_xino(h_sb))) {
- pr_err("xino doesn't support %s(%s)\n",
- AUFS_XINO_DEFPATH, au_sbtype(h_sb));
- fput(file);
- file = ERR_PTR(-EINVAL);
- }
- if (!IS_ERR(file))
- au_xino_brid_set(sb, -1);
- }
-
-out:
- return file;
-}
-
-/* ---------------------------------------------------------------------- */
-
-int au_xino_path(struct seq_file *seq, struct file *file)
-{
- int err;
-
- err = au_seq_path(seq, &file->f_path);
- if (unlikely(err))
- goto out;
-
-#define Deleted "\\040(deleted)"
- seq->count -= sizeof(Deleted) - 1;
- AuDebugOn(memcmp(seq->buf + seq->count, Deleted,
- sizeof(Deleted) - 1));
-#undef Deleted
-
-out:
- return err;
-}
diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c
index da0c33481..84e037d1d 100644
--- a/fs/autofs4/symlink.c
+++ b/fs/autofs4/symlink.c
@@ -12,10 +12,16 @@
#include "autofs_i.h"
-static const char *autofs4_follow_link(struct dentry *dentry, void **cookie)
+static const char *autofs4_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
- struct autofs_info *ino = autofs4_dentry_ino(dentry);
+ struct autofs_sb_info *sbi;
+ struct autofs_info *ino;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+ sbi = autofs4_sbi(dentry->d_sb);
+ ino = autofs4_dentry_ino(dentry);
if (ino && !autofs4_oz_mode(sbi))
ino->last_used = jiffies;
return d_inode(dentry)->i_private;
@@ -23,5 +29,5 @@ static const char *autofs4_follow_link(struct dentry *dentry, void **cookie)
const struct inode_operations autofs4_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = autofs4_follow_link
+ .get_link = autofs4_get_link
};
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 861b1e1c4..103f5d7c3 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -192,7 +192,7 @@ EXPORT_SYMBOL(make_bad_inode);
* Returns true if the inode in question has been marked as bad.
*/
-int is_bad_inode(struct inode *inode)
+bool is_bad_inode(struct inode *inode)
{
return (inode->i_op == &bad_inode_ops);
}
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 46aedacfa..cc0e08252 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -42,7 +42,7 @@ static struct inode *befs_iget(struct super_block *, unsigned long);
static struct inode *befs_alloc_inode(struct super_block *sb);
static void befs_destroy_inode(struct inode *inode);
static void befs_destroy_inodecache(void);
-static const char *befs_follow_link(struct dentry *, void **);
+static int befs_symlink_readpage(struct file *, struct page *);
static int befs_utf2nls(struct super_block *sb, const char *in, int in_len,
char **out, int *out_len);
static int befs_nls2utf(struct super_block *sb, const char *in, int in_len,
@@ -79,10 +79,8 @@ static const struct address_space_operations befs_aops = {
.bmap = befs_bmap,
};
-static const struct inode_operations befs_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = befs_follow_link,
- .put_link = kfree_put_link,
+static const struct address_space_operations befs_symlink_aops = {
+ .readpage = befs_symlink_readpage,
};
/*
@@ -398,7 +396,9 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
inode->i_fop = &befs_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
if (befs_ino->i_flags & BEFS_LONG_SYMLINK) {
- inode->i_op = &befs_symlink_inode_operations;
+ inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
+ inode->i_mapping->a_ops = &befs_symlink_aops;
} else {
inode->i_link = befs_ino->i_data.symlink;
inode->i_op = &simple_symlink_inode_operations;
@@ -434,7 +434,7 @@ befs_init_inodecache(void)
befs_inode_cachep = kmem_cache_create("befs_inode_cache",
sizeof (struct befs_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
init_once);
if (befs_inode_cachep == NULL) {
pr_err("%s: Couldn't initialize inode slabcache\n", __func__);
@@ -463,31 +463,33 @@ befs_destroy_inodecache(void)
* The data stream become link name. Unless the LONG_SYMLINK
* flag is set.
*/
-static const char *
-befs_follow_link(struct dentry *dentry, void **cookie)
+static int befs_symlink_readpage(struct file *unused, struct page *page)
{
- struct super_block *sb = dentry->d_sb;
- struct befs_inode_info *befs_ino = BEFS_I(d_inode(dentry));
+ struct inode *inode = page->mapping->host;
+ struct super_block *sb = inode->i_sb;
+ struct befs_inode_info *befs_ino = BEFS_I(inode);
befs_data_stream *data = &befs_ino->i_data.ds;
befs_off_t len = data->size;
- char *link;
+ char *link = page_address(page);
- if (len == 0) {
+ if (len == 0 || len > PAGE_SIZE) {
befs_error(sb, "Long symlink with illegal length");
- return ERR_PTR(-EIO);
+ goto fail;
}
befs_debug(sb, "Follow long symlink");
- link = kmalloc(len, GFP_NOFS);
- if (!link)
- return ERR_PTR(-ENOMEM);
if (befs_read_lsymlink(sb, data, link, len) != len) {
- kfree(link);
befs_error(sb, "Failed to read entire long symlink");
- return ERR_PTR(-EIO);
+ goto fail;
}
link[len - 1] = '\0';
- return *cookie = link;
+ SetPageUptodate(page);
+ unlock_page(page);
+ return 0;
+fail:
+ SetPageError(page);
+ unlock_page(page);
+ return -EIO;
}
/*
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index fdcb4d69f..1e5c896f6 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -270,7 +270,7 @@ static int __init init_inodecache(void)
bfs_inode_cachep = kmem_cache_create("bfs_inode_cache",
sizeof(struct bfs_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
init_once);
if (bfs_inode_cachep == NULL)
return -ENOMEM;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 3a93755e8..7d914c67a 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -491,6 +491,7 @@ static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
* arch_check_elf() - check an ELF executable
* @ehdr: The main ELF header
* @has_interp: True if the ELF has an interpreter, else false.
+ * @interp_ehdr: The interpreter's ELF header
* @state: Architecture-specific state preserved throughout the process
* of loading the ELF.
*
@@ -502,6 +503,7 @@ static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
* with that return code.
*/
static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
+ struct elfhdr *interp_ehdr,
struct arch_elf_state *state)
{
/* Dummy implementation, always proceed */
@@ -651,7 +653,7 @@ static unsigned long randomize_stack_top(unsigned long stack_top)
if ((current->flags & PF_RANDOMIZE) &&
!(current->personality & ADDR_NO_RANDOMIZE)) {
- random_variable = (unsigned long) get_random_int();
+ random_variable = get_random_long();
random_variable &= STACK_RND_MASK;
random_variable <<= PAGE_SHIFT;
}
@@ -829,7 +831,9 @@ static int load_elf_binary(struct linux_binprm *bprm)
* still possible to return an error to the code that invoked
* the exec syscall.
*/
- retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
+ retval = arch_check_elf(&loc->elf_ex,
+ !!interpreter, &loc->interp_elf_ex,
+ &arch_state);
if (retval)
goto out_free_dentry;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 78f005f37..3a3ced779 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -638,11 +638,11 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
case 3:
/* Delete this handler. */
root = dget(file->f_path.dentry->d_sb->s_root);
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
kill_node(e);
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
dput(root);
break;
default:
@@ -675,7 +675,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
return PTR_ERR(e);
root = dget(sb->s_root);
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
dentry = lookup_one_len(e->name, root, strlen(e->name));
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
@@ -711,7 +711,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
out2:
dput(dentry);
out:
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
dput(root);
if (err) {
@@ -754,12 +754,12 @@ static ssize_t bm_status_write(struct file *file, const char __user *buffer,
case 3:
/* Delete all handlers. */
root = dget(file->f_path.dentry->d_sb->s_root);
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
while (!list_empty(&entries))
kill_node(list_entry(entries.next, Node, list));
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
dput(root);
break;
default:
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 44d4a1e92..826b164a4 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -75,7 +75,7 @@ void kill_bdev(struct block_device *bdev)
{
struct address_space *mapping = bdev->bd_inode->i_mapping;
- if (mapping->nrpages == 0 && mapping->nrshadows == 0)
+ if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
return;
invalidate_bh_lrus();
@@ -156,11 +156,16 @@ blkdev_get_block(struct inode *inode, sector_t iblock,
return 0;
}
+static struct inode *bdev_file_inode(struct file *file)
+{
+ return file->f_mapping->host;
+}
+
static ssize_t
blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
{
struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
+ struct inode *inode = bdev_file_inode(file);
if (IS_DAX(inode))
return dax_do_io(iocb, inode, iter, offset, blkdev_get_block,
@@ -338,18 +343,18 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping,
*/
static loff_t block_llseek(struct file *file, loff_t offset, int whence)
{
- struct inode *bd_inode = file->f_mapping->host;
+ struct inode *bd_inode = bdev_file_inode(file);
loff_t retval;
- mutex_lock(&bd_inode->i_mutex);
+ inode_lock(bd_inode);
retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
- mutex_unlock(&bd_inode->i_mutex);
+ inode_unlock(bd_inode);
return retval;
}
int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
{
- struct inode *bd_inode = filp->f_mapping->host;
+ struct inode *bd_inode = bdev_file_inode(filp);
struct block_device *bdev = I_BDEV(bd_inode);
int error;
@@ -395,7 +400,7 @@ int bdev_read_page(struct block_device *bdev, sector_t sector,
if (!ops->rw_page || bdev_get_integrity(bdev))
return result;
- result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL);
+ result = blk_queue_enter(bdev->bd_queue, false);
if (result)
return result;
result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
@@ -432,7 +437,7 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
if (!ops->rw_page || bdev_get_integrity(bdev))
return -EOPNOTSUPP;
- result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL);
+ result = blk_queue_enter(bdev->bd_queue, false);
if (result)
return result;
@@ -450,10 +455,7 @@ EXPORT_SYMBOL_GPL(bdev_write_page);
/**
* bdev_direct_access() - Get the address for directly-accessibly memory
* @bdev: The device containing the memory
- * @sector: The offset within the device
- * @addr: Where to put the address of the memory
- * @pfn: The Page Frame Number for the memory
- * @size: The number of bytes requested
+ * @dax: control and output parameters for ->direct_access
*
* If a block device is made up of directly addressable memory, this function
* will tell the caller the PFN and the address of the memory. The address
@@ -464,10 +466,10 @@ EXPORT_SYMBOL_GPL(bdev_write_page);
* Return: negative errno if an error occurs, otherwise the number of bytes
* accessible at this address.
*/
-long bdev_direct_access(struct block_device *bdev, sector_t sector,
- void __pmem **addr, unsigned long *pfn, long size)
+long bdev_direct_access(struct block_device *bdev, struct blk_dax_ctl *dax)
{
- long avail;
+ sector_t sector = dax->sector;
+ long avail, size = dax->size;
const struct block_device_operations *ops = bdev->bd_disk->fops;
/*
@@ -486,9 +488,11 @@ long bdev_direct_access(struct block_device *bdev, sector_t sector,
sector += get_start_sect(bdev);
if (sector % (PAGE_SIZE / 512))
return -EINVAL;
- avail = ops->direct_access(bdev, sector, addr, pfn);
+ avail = ops->direct_access(bdev, sector, &dax->addr, &dax->pfn);
if (!avail)
return -ERANGE;
+ if (avail > 0 && avail & ~PAGE_MASK)
+ return -ENXIO;
return min(avail, size);
}
EXPORT_SYMBOL_GPL(bdev_direct_access);
@@ -590,7 +594,7 @@ void __init bdev_cache_init(void)
bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_PANIC),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT|SLAB_PANIC),
init_once);
err = register_filesystem(&bd_type);
if (err)
@@ -696,7 +700,7 @@ static struct block_device *bd_acquire(struct inode *inode)
spin_lock(&bdev_lock);
bdev = inode->i_bdev;
if (bdev) {
- ihold(bdev->bd_inode);
+ bdgrab(bdev);
spin_unlock(&bdev_lock);
return bdev;
}
@@ -712,7 +716,7 @@ static struct block_device *bd_acquire(struct inode *inode)
* So, we can access it via ->i_mapping always
* without igrab().
*/
- ihold(bdev->bd_inode);
+ bdgrab(bdev);
inode->i_bdev = bdev;
inode->i_mapping = bdev->bd_inode->i_mapping;
list_add(&inode->i_devices, &bdev->bd_inodes);
@@ -735,7 +739,7 @@ void bd_forget(struct inode *inode)
spin_unlock(&bdev_lock);
if (bdev)
- iput(bdev->bd_inode);
+ bdput(bdev);
}
/**
@@ -1042,12 +1046,9 @@ EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
static void flush_disk(struct block_device *bdev, bool kill_dirty)
{
if (__invalidate_device(bdev, kill_dirty)) {
- char name[BDEVNAME_SIZE] = "";
-
- if (bdev->bd_disk)
- disk_name(bdev->bd_disk, 0, name);
printk(KERN_WARNING "VFS: busy inodes on changed media or "
- "resized disk %s\n", name);
+ "resized disk %s\n",
+ bdev->bd_disk ? bdev->bd_disk->disk_name : "");
}
if (!bdev->bd_disk)
@@ -1071,12 +1072,9 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
disk_size = (loff_t)get_capacity(disk) << 9;
bdev_size = i_size_read(bdev->bd_inode);
if (disk_size != bdev_size) {
- char name[BDEVNAME_SIZE];
-
- disk_name(disk, 0, name);
printk(KERN_INFO
"%s: detected capacity change from %lld to %lld\n",
- name, bdev_size, disk_size);
+ disk->disk_name, bdev_size, disk_size);
i_size_write(bdev->bd_inode, disk_size);
flush_disk(bdev, false);
}
@@ -1144,9 +1142,9 @@ void bd_set_size(struct block_device *bdev, loff_t size)
{
unsigned bsize = bdev_logical_block_size(bdev);
- mutex_lock(&bdev->bd_inode->i_mutex);
+ inode_lock(bdev->bd_inode);
i_size_write(bdev->bd_inode, size);
- mutex_unlock(&bdev->bd_inode->i_mutex);
+ inode_unlock(bdev->bd_inode);
while (bsize < PAGE_CACHE_SIZE) {
if (size & bsize)
break;
@@ -1203,7 +1201,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
bdev->bd_disk = disk;
bdev->bd_queue = disk->queue;
bdev->bd_contains = bdev;
- bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0;
+ if (IS_ENABLED(CONFIG_BLK_DEV_DAX) && disk->fops->direct_access)
+ bdev->bd_inode->i_flags = S_DAX;
+ else
+ bdev->bd_inode->i_flags = 0;
+
if (!partno) {
ret = -ENXIO;
bdev->bd_part = disk_get_part(disk, partno);
@@ -1230,8 +1232,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
}
}
- if (!ret)
+ if (!ret) {
bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
+ if (!blkdev_dax_capable(bdev))
+ bdev->bd_inode->i_flags &= ~S_DAX;
+ }
/*
* If the device is invalidated, rescan partition
@@ -1245,6 +1250,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
else if (ret == -ENOMEDIUM)
invalidate_partitions(disk, bdev);
}
+
if (ret)
goto out_clear;
} else {
@@ -1265,12 +1271,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
goto out_clear;
}
bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
- /*
- * If the partition is not aligned on a page
- * boundary, we can't do dax I/O to it.
- */
- if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) ||
- (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
+ if (!blkdev_dax_capable(bdev))
bdev->bd_inode->i_flags &= ~S_DAX;
}
} else {
@@ -1605,14 +1606,14 @@ EXPORT_SYMBOL(blkdev_put);
static int blkdev_close(struct inode * inode, struct file * filp)
{
- struct block_device *bdev = I_BDEV(filp->f_mapping->host);
+ struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
blkdev_put(bdev, filp->f_mode);
return 0;
}
static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
- struct block_device *bdev = I_BDEV(file->f_mapping->host);
+ struct block_device *bdev = I_BDEV(bdev_file_inode(file));
fmode_t mode = file->f_mode;
/*
@@ -1637,7 +1638,7 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
- struct inode *bd_inode = file->f_mapping->host;
+ struct inode *bd_inode = bdev_file_inode(file);
loff_t size = i_size_read(bd_inode);
struct blk_plug plug;
ssize_t ret;
@@ -1669,7 +1670,7 @@ EXPORT_SYMBOL_GPL(blkdev_write_iter);
ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
- struct inode *bd_inode = file->f_mapping->host;
+ struct inode *bd_inode = bdev_file_inode(file);
loff_t size = i_size_read(bd_inode);
loff_t pos = iocb->ki_pos;
@@ -1696,25 +1697,102 @@ static int blkdev_releasepage(struct page *page, gfp_t wait)
return try_to_free_buffers(page);
}
+static int blkdev_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ if (dax_mapping(mapping)) {
+ struct block_device *bdev = I_BDEV(mapping->host);
+
+ return dax_writeback_mapping_range(mapping, bdev, wbc);
+ }
+ return generic_writepages(mapping, wbc);
+}
+
static const struct address_space_operations def_blk_aops = {
.readpage = blkdev_readpage,
.readpages = blkdev_readpages,
.writepage = blkdev_writepage,
.write_begin = blkdev_write_begin,
.write_end = blkdev_write_end,
- .writepages = generic_writepages,
+ .writepages = blkdev_writepages,
.releasepage = blkdev_releasepage,
.direct_IO = blkdev_direct_IO,
.is_dirty_writeback = buffer_check_dirty_writeback,
};
+#ifdef CONFIG_FS_DAX
+/*
+ * In the raw block case we do not need to contend with truncation nor
+ * unwritten file extents. Without those concerns there is no need for
+ * additional locking beyond the mmap_sem context that these routines
+ * are already executing under.
+ *
+ * Note, there is no protection if the block device is dynamically
+ * resized (partition grow/shrink) during a fault. A stable block device
+ * size is already not enforced in the blkdev_direct_IO path.
+ *
+ * For DAX, it is the responsibility of the block device driver to
+ * ensure the whole-disk device size is stable while requests are in
+ * flight.
+ *
+ * Finally, unlike the filemap_page_mkwrite() case there is no
+ * filesystem superblock to sync against freezing. We still include a
+ * pfn_mkwrite callback for dax drivers to receive write fault
+ * notifications.
+ */
+static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ return __dax_fault(vma, vmf, blkdev_get_block, NULL);
+}
+
+static int blkdev_dax_pfn_mkwrite(struct vm_area_struct *vma,
+ struct vm_fault *vmf)
+{
+ return dax_pfn_mkwrite(vma, vmf);
+}
+
+static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd, unsigned int flags)
+{
+ return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
+}
+
+static const struct vm_operations_struct blkdev_dax_vm_ops = {
+ .fault = blkdev_dax_fault,
+ .pmd_fault = blkdev_dax_pmd_fault,
+ .pfn_mkwrite = blkdev_dax_pfn_mkwrite,
+};
+
+static const struct vm_operations_struct blkdev_default_vm_ops = {
+ .fault = filemap_fault,
+ .map_pages = filemap_map_pages,
+};
+
+static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct inode *bd_inode = bdev_file_inode(file);
+
+ file_accessed(file);
+ if (IS_DAX(bd_inode)) {
+ vma->vm_ops = &blkdev_dax_vm_ops;
+ vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
+ } else {
+ vma->vm_ops = &blkdev_default_vm_ops;
+ }
+
+ return 0;
+}
+#else
+#define blkdev_mmap generic_file_mmap
+#endif
+
const struct file_operations def_blk_fops = {
.open = blkdev_open,
.release = blkdev_close,
.llseek = block_llseek,
.read_iter = blkdev_read_iter,
.write_iter = blkdev_write_iter,
- .mmap = generic_file_mmap,
+ .mmap = blkdev_mmap,
.fsync = blkdev_fsync,
.unlocked_ioctl = block_ioctl,
#ifdef CONFIG_COMPAT
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 6d1d0b93b..128ce17a8 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -9,11 +9,12 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
- uuid-tree.o props.o hash.o
+ uuid-tree.o props.o hash.o free-space-tree.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
tests/extent-buffer-tests.o tests/btrfs-tests.o \
- tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o
+ tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o \
+ tests/free-space-tree-tests.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 9a0124a95..6d263bb16 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -37,10 +37,10 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
switch (type) {
case ACL_TYPE_ACCESS:
- name = POSIX_ACL_XATTR_ACCESS;
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
break;
case ACL_TYPE_DEFAULT:
- name = POSIX_ACL_XATTR_DEFAULT;
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
BUG();
@@ -48,7 +48,7 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
size = __btrfs_getxattr(inode, name, "", 0);
if (size > 0) {
- value = kzalloc(size, GFP_NOFS);
+ value = kzalloc(size, GFP_KERNEL);
if (!value)
return ERR_PTR(-ENOMEM);
size = __btrfs_getxattr(inode, name, value, size);
@@ -81,7 +81,7 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
switch (type) {
case ACL_TYPE_ACCESS:
- name = POSIX_ACL_XATTR_ACCESS;
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
if (acl) {
ret = posix_acl_equiv_mode(acl, &inode->i_mode);
if (ret < 0)
@@ -94,7 +94,7 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
case ACL_TYPE_DEFAULT:
if (!S_ISDIR(inode->i_mode))
return acl ? -EINVAL : 0;
- name = POSIX_ACL_XATTR_DEFAULT;
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
return -EINVAL;
@@ -102,7 +102,7 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
if (acl) {
size = posix_acl_xattr_size(acl->a_count);
- value = kmalloc(size, GFP_NOFS);
+ value = kmalloc(size, GFP_KERNEL);
if (!value) {
ret = -ENOMEM;
goto out;
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 9aba42b78..5fb60ea7e 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -97,7 +97,7 @@ static struct __btrfs_workqueue *
__btrfs_alloc_workqueue(const char *name, unsigned int flags, int limit_active,
int thresh)
{
- struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
+ struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_KERNEL);
if (!ret)
return NULL;
@@ -148,7 +148,7 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
int limit_active,
int thresh)
{
- struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
+ struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_KERNEL);
if (!ret)
return NULL;
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index e2f659dc5..f6dac40f8 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -520,13 +520,10 @@ static inline int ref_for_same_block(struct __prelim_ref *ref1,
static int __add_missing_keys(struct btrfs_fs_info *fs_info,
struct list_head *head)
{
- struct list_head *pos;
+ struct __prelim_ref *ref;
struct extent_buffer *eb;
- list_for_each(pos, head) {
- struct __prelim_ref *ref;
- ref = list_entry(pos, struct __prelim_ref, list);
-
+ list_for_each_entry(ref, head, list) {
if (ref->parent)
continue;
if (ref->key_for_search.type)
@@ -563,23 +560,15 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
*/
static void __merge_refs(struct list_head *head, int mode)
{
- struct list_head *pos1;
+ struct __prelim_ref *pos1;
- list_for_each(pos1, head) {
- struct list_head *n2;
- struct list_head *pos2;
- struct __prelim_ref *ref1;
+ list_for_each_entry(pos1, head, list) {
+ struct __prelim_ref *pos2 = pos1, *tmp;
- ref1 = list_entry(pos1, struct __prelim_ref, list);
-
- for (pos2 = pos1->next, n2 = pos2->next; pos2 != head;
- pos2 = n2, n2 = pos2->next) {
- struct __prelim_ref *ref2;
- struct __prelim_ref *xchg;
+ list_for_each_entry_safe_continue(pos2, tmp, head, list) {
+ struct __prelim_ref *xchg, *ref1 = pos1, *ref2 = pos2;
struct extent_inode_elem *eie;
- ref2 = list_entry(pos2, struct __prelim_ref, list);
-
if (!ref_for_same_block(ref1, ref2))
continue;
if (mode == 1) {
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 0ef5cc13f..61205e3bb 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -192,6 +192,10 @@ struct btrfs_inode {
/* File creation time. */
struct timespec i_otime;
+ /* Hook into fs_info->delayed_iputs */
+ struct list_head delayed_iput;
+ long delayed_iput_count;
+
struct inode vfs_inode;
};
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 0340c57bf..861d47256 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -531,13 +531,9 @@ static struct btrfsic_block *btrfsic_block_hashtable_lookup(
(((unsigned int)(dev_bytenr >> 16)) ^
((unsigned int)((uintptr_t)bdev))) &
(BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
- struct list_head *elem;
-
- list_for_each(elem, h->table + hashval) {
- struct btrfsic_block *const b =
- list_entry(elem, struct btrfsic_block,
- collision_resolving_node);
+ struct btrfsic_block *b;
+ list_for_each_entry(b, h->table + hashval, collision_resolving_node) {
if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr)
return b;
}
@@ -588,13 +584,9 @@ static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
((unsigned int)((uintptr_t)bdev_ref_to)) ^
((unsigned int)((uintptr_t)bdev_ref_from))) &
(BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
- struct list_head *elem;
-
- list_for_each(elem, h->table + hashval) {
- struct btrfsic_block_link *const l =
- list_entry(elem, struct btrfsic_block_link,
- collision_resolving_node);
+ struct btrfsic_block_link *l;
+ list_for_each_entry(l, h->table + hashval, collision_resolving_node) {
BUG_ON(NULL == l->block_ref_to);
BUG_ON(NULL == l->block_ref_from);
if (l->block_ref_to->dev_state->bdev == bdev_ref_to &&
@@ -639,13 +631,9 @@ static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
const unsigned int hashval =
(((unsigned int)((uintptr_t)bdev)) &
(BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
- struct list_head *elem;
-
- list_for_each(elem, h->table + hashval) {
- struct btrfsic_dev_state *const ds =
- list_entry(elem, struct btrfsic_dev_state,
- collision_resolving_node);
+ struct btrfsic_dev_state *ds;
+ list_for_each_entry(ds, h->table + hashval, collision_resolving_node) {
if (ds->bdev == bdev)
return ds;
}
@@ -1720,29 +1708,20 @@ static int btrfsic_read_block(struct btrfsic_state *state,
static void btrfsic_dump_database(struct btrfsic_state *state)
{
- struct list_head *elem_all;
+ const struct btrfsic_block *b_all;
BUG_ON(NULL == state);
printk(KERN_INFO "all_blocks_list:\n");
- list_for_each(elem_all, &state->all_blocks_list) {
- const struct btrfsic_block *const b_all =
- list_entry(elem_all, struct btrfsic_block,
- all_blocks_node);
- struct list_head *elem_ref_to;
- struct list_head *elem_ref_from;
+ list_for_each_entry(b_all, &state->all_blocks_list, all_blocks_node) {
+ const struct btrfsic_block_link *l;
printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n",
btrfsic_get_block_type(state, b_all),
b_all->logical_bytenr, b_all->dev_state->name,
b_all->dev_bytenr, b_all->mirror_num);
- list_for_each(elem_ref_to, &b_all->ref_to_list) {
- const struct btrfsic_block_link *const l =
- list_entry(elem_ref_to,
- struct btrfsic_block_link,
- node_ref_to);
-
+ list_for_each_entry(l, &b_all->ref_to_list, node_ref_to) {
printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
" refers %u* to"
" %c @%llu (%s/%llu/%d)\n",
@@ -1757,12 +1736,7 @@ static void btrfsic_dump_database(struct btrfsic_state *state)
l->block_ref_to->mirror_num);
}
- list_for_each(elem_ref_from, &b_all->ref_from_list) {
- const struct btrfsic_block_link *const l =
- list_entry(elem_ref_from,
- struct btrfsic_block_link,
- node_ref_from);
-
+ list_for_each_entry(l, &b_all->ref_from_list, node_ref_from) {
printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
" is ref %u* from"
" %c @%llu (%s/%llu/%d)\n",
@@ -1845,8 +1819,7 @@ again:
&state->block_hashtable);
if (NULL != block) {
u64 bytenr = 0;
- struct list_head *elem_ref_to;
- struct list_head *tmp_ref_to;
+ struct btrfsic_block_link *l, *tmp;
if (block->is_superblock) {
bytenr = btrfs_super_bytenr((struct btrfs_super_block *)
@@ -1967,13 +1940,8 @@ again:
* because it still carries valueable information
* like whether it was ever written and IO completed.
*/
- list_for_each_safe(elem_ref_to, tmp_ref_to,
- &block->ref_to_list) {
- struct btrfsic_block_link *const l =
- list_entry(elem_ref_to,
- struct btrfsic_block_link,
- node_ref_to);
-
+ list_for_each_entry_safe(l, tmp, &block->ref_to_list,
+ node_ref_to) {
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
btrfsic_print_rem_link(state, l);
l->ref_cnt--;
@@ -2436,7 +2404,7 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
struct btrfsic_block *const block,
int recursion_level)
{
- struct list_head *elem_ref_to;
+ const struct btrfsic_block_link *l;
int ret = 0;
if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
@@ -2464,11 +2432,7 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
* This algorithm is recursive because the amount of used stack
* space is very small and the max recursion depth is limited.
*/
- list_for_each(elem_ref_to, &block->ref_to_list) {
- const struct btrfsic_block_link *const l =
- list_entry(elem_ref_to, struct btrfsic_block_link,
- node_ref_to);
-
+ list_for_each_entry(l, &block->ref_to_list, node_ref_to) {
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
printk(KERN_INFO
"rl=%d, %c @%llu (%s/%llu/%d)"
@@ -2561,7 +2525,7 @@ static int btrfsic_is_block_ref_by_superblock(
const struct btrfsic_block *block,
int recursion_level)
{
- struct list_head *elem_ref_from;
+ const struct btrfsic_block_link *l;
if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
/* refer to comment at "abort cyclic linkage (case 1)" */
@@ -2576,11 +2540,7 @@ static int btrfsic_is_block_ref_by_superblock(
* This algorithm is recursive because the amount of used stack space
* is very small and the max recursion depth is limited.
*/
- list_for_each(elem_ref_from, &block->ref_from_list) {
- const struct btrfsic_block_link *const l =
- list_entry(elem_ref_from, struct btrfsic_block_link,
- node_ref_from);
-
+ list_for_each_entry(l, &block->ref_from_list, node_ref_from) {
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
printk(KERN_INFO
"rl=%d, %c @%llu (%s/%llu/%d)"
@@ -2669,7 +2629,7 @@ static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
const struct btrfsic_block *block,
int indent_level)
{
- struct list_head *elem_ref_to;
+ const struct btrfsic_block_link *l;
int indent_add;
static char buf[80];
int cursor_position;
@@ -2704,11 +2664,7 @@ static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
}
cursor_position = indent_level;
- list_for_each(elem_ref_to, &block->ref_to_list) {
- const struct btrfsic_block_link *const l =
- list_entry(elem_ref_to, struct btrfsic_block_link,
- node_ref_to);
-
+ list_for_each_entry(l, &block->ref_to_list, node_ref_to) {
while (cursor_position < indent_level) {
printk(" ");
cursor_position++;
@@ -3165,8 +3121,7 @@ int btrfsic_mount(struct btrfs_root *root,
void btrfsic_unmount(struct btrfs_root *root,
struct btrfs_fs_devices *fs_devices)
{
- struct list_head *elem_all;
- struct list_head *tmp_all;
+ struct btrfsic_block *b_all, *tmp_all;
struct btrfsic_state *state;
struct list_head *dev_head = &fs_devices->devices;
struct btrfs_device *device;
@@ -3206,20 +3161,12 @@ void btrfsic_unmount(struct btrfs_root *root,
* just free all memory that was allocated dynamically.
* Free the blocks and the block_links.
*/
- list_for_each_safe(elem_all, tmp_all, &state->all_blocks_list) {
- struct btrfsic_block *const b_all =
- list_entry(elem_all, struct btrfsic_block,
- all_blocks_node);
- struct list_head *elem_ref_to;
- struct list_head *tmp_ref_to;
-
- list_for_each_safe(elem_ref_to, tmp_ref_to,
- &b_all->ref_to_list) {
- struct btrfsic_block_link *const l =
- list_entry(elem_ref_to,
- struct btrfsic_block_link,
- node_ref_to);
+ list_for_each_entry_safe(b_all, tmp_all, &state->all_blocks_list,
+ all_blocks_node) {
+ struct btrfsic_block_link *l, *tmp;
+ list_for_each_entry_safe(l, tmp, &b_all->ref_to_list,
+ node_ref_to) {
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
btrfsic_print_rem_link(state, l);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index c473c42d7..3346cd8f9 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -637,11 +637,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
faili = nr_pages - 1;
cb->nr_pages = nr_pages;
- /* In the parent-locked case, we only locked the range we are
- * interested in. In all other cases, we can opportunistically
- * cache decompressed data that goes beyond the requested range. */
- if (!(bio_flags & EXTENT_BIO_PARENT_LOCKED))
- add_ra_bio_pages(inode, em_start + em_len, cb);
+ add_ra_bio_pages(inode, em_start + em_len, cb);
/* include any pages we added in add_ra-bio_pages */
uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 5b8e235c4..769e0ff1b 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1555,7 +1555,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
return 0;
}
- search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
+ search_start = buf->start & ~((u64)SZ_1G - 1);
if (parent)
btrfs_set_lock_blocking(parent);
@@ -2248,7 +2248,6 @@ static void reada_for_search(struct btrfs_root *root,
u64 target;
u64 nread = 0;
u64 gen;
- int direction = path->reada;
struct extent_buffer *eb;
u32 nr;
u32 blocksize;
@@ -2276,16 +2275,16 @@ static void reada_for_search(struct btrfs_root *root,
nr = slot;
while (1) {
- if (direction < 0) {
+ if (path->reada == READA_BACK) {
if (nr == 0)
break;
nr--;
- } else if (direction > 0) {
+ } else if (path->reada == READA_FORWARD) {
nr++;
if (nr >= nritems)
break;
}
- if (path->reada < 0 && objectid) {
+ if (path->reada == READA_BACK && objectid) {
btrfs_node_key(node, &disk_key, nr);
if (btrfs_disk_key_objectid(&disk_key) != objectid)
break;
@@ -2493,7 +2492,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
btrfs_set_path_blocking(p);
free_extent_buffer(tmp);
- if (p->reada)
+ if (p->reada != READA_NONE)
reada_for_search(root, p, level, slot, key->objectid);
btrfs_release_path(p);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 385b449fd..bfe4a337f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -35,6 +35,7 @@
#include <linux/btrfs.h>
#include <linux/workqueue.h>
#include <linux/security.h>
+#include <linux/sizes.h>
#include "extent_io.h"
#include "extent_map.h"
#include "async-thread.h"
@@ -96,6 +97,9 @@ struct btrfs_ordered_sum;
/* for storing items that use the BTRFS_UUID_KEY* types */
#define BTRFS_UUID_TREE_OBJECTID 9ULL
+/* tracks free space in block groups. */
+#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
+
/* for storing balance parameters in the root tree */
#define BTRFS_BALANCE_OBJECTID -4ULL
@@ -174,7 +178,7 @@ struct btrfs_ordered_sum;
/* csum types */
#define BTRFS_CSUM_TYPE_CRC32 0
-static int btrfs_csum_sizes[] = { 4 };
+static const int btrfs_csum_sizes[] = { 4 };
/* four bytes for CRC32 */
#define BTRFS_EMPTY_DIR_SIZE 0
@@ -196,9 +200,9 @@ static int btrfs_csum_sizes[] = { 4 };
/* ioprio of readahead is set to idle */
#define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0))
-#define BTRFS_DIRTY_METADATA_THRESH (32 * 1024 * 1024)
+#define BTRFS_DIRTY_METADATA_THRESH SZ_32M
-#define BTRFS_MAX_EXTENT_SIZE (128 * 1024 * 1024)
+#define BTRFS_MAX_EXTENT_SIZE SZ_128M
/*
* The key defines the order in the tree, and so it also defines (optimal)
@@ -500,6 +504,8 @@ struct btrfs_super_block {
* Compat flags that we support. If any incompat flags are set other than the
* ones specified below then we will fail to mount
*/
+#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE (1ULL << 0)
+
#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
@@ -526,7 +532,10 @@ struct btrfs_super_block {
#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
#define BTRFS_FEATURE_COMPAT_SAFE_SET 0ULL
#define BTRFS_FEATURE_COMPAT_SAFE_CLEAR 0ULL
-#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
+
+#define BTRFS_FEATURE_COMPAT_RO_SUPP \
+ (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)
+
#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL
@@ -590,14 +599,15 @@ struct btrfs_node {
* The slots array records the index of the item or block pointer
* used while walking the tree.
*/
+enum { READA_NONE = 0, READA_BACK, READA_FORWARD };
struct btrfs_path {
struct extent_buffer *nodes[BTRFS_MAX_LEVEL];
int slots[BTRFS_MAX_LEVEL];
/* if there is real range locking, this locks field will change */
- int locks[BTRFS_MAX_LEVEL];
- int reada;
+ u8 locks[BTRFS_MAX_LEVEL];
+ u8 reada;
/* keep some upper locks as we walk down */
- int lowest_level;
+ u8 lowest_level;
/*
* set by btrfs_split_item, tells search_slot to keep all locks
@@ -1088,6 +1098,13 @@ struct btrfs_block_group_item {
__le64 flags;
} __attribute__ ((__packed__));
+struct btrfs_free_space_info {
+ __le32 extent_count;
+ __le32 flags;
+} __attribute__ ((__packed__));
+
+#define BTRFS_FREE_SPACE_USING_BITMAPS (1ULL << 0)
+
#define BTRFS_QGROUP_LEVEL_SHIFT 48
static inline u64 btrfs_qgroup_level(u64 qgroupid)
{
@@ -1296,6 +1313,9 @@ struct btrfs_caching_control {
atomic_t count;
};
+/* Once caching_thread() finds this much free space, it will wake up waiters. */
+#define CACHING_CTL_WAKE_UP (1024 * 1024 * 2)
+
struct btrfs_io_ctl {
void *cur, *orig;
struct page *page;
@@ -1321,8 +1341,20 @@ struct btrfs_block_group_cache {
u64 delalloc_bytes;
u64 bytes_super;
u64 flags;
- u64 sectorsize;
u64 cache_generation;
+ u32 sectorsize;
+
+ /*
+ * If the free space extent count exceeds this number, convert the block
+ * group to bitmaps.
+ */
+ u32 bitmap_high_thresh;
+
+ /*
+ * If the free space extent count drops below this number, convert the
+ * block group back to extents.
+ */
+ u32 bitmap_low_thresh;
/*
* It is just used for the delayed data space allocation because
@@ -1378,6 +1410,15 @@ struct btrfs_block_group_cache {
struct list_head io_list;
struct btrfs_io_ctl io_ctl;
+
+ /* Lock for free space tree operations. */
+ struct mutex free_space_lock;
+
+ /*
+ * Does the block group need to be added to the free space tree?
+ * Protected by free_space_lock.
+ */
+ int needs_free_space;
};
/* delayed seq elem */
@@ -1429,6 +1470,7 @@ struct btrfs_fs_info {
struct btrfs_root *csum_root;
struct btrfs_root *quota_root;
struct btrfs_root *uuid_root;
+ struct btrfs_root *free_space_root;
/* the log root tree is a directory of all the other log roots */
struct btrfs_root *log_root_tree;
@@ -1816,6 +1858,8 @@ struct btrfs_fs_info {
* and will be latter freed. Protected by fs_info->chunk_mutex.
*/
struct list_head pinned_chunks;
+
+ int creating_free_space_tree;
};
struct btrfs_subvolume_writers {
@@ -2092,6 +2136,27 @@ struct btrfs_ioctl_defrag_range_args {
*/
#define BTRFS_BLOCK_GROUP_ITEM_KEY 192
+/*
+ * Every block group is represented in the free space tree by a free space info
+ * item, which stores some accounting information. It is keyed on
+ * (block_group_start, FREE_SPACE_INFO, block_group_length).
+ */
+#define BTRFS_FREE_SPACE_INFO_KEY 198
+
+/*
+ * A free space extent tracks an extent of space that is free in a block group.
+ * It is keyed on (start, FREE_SPACE_EXTENT, length).
+ */
+#define BTRFS_FREE_SPACE_EXTENT_KEY 199
+
+/*
+ * When a block group becomes very fragmented, we convert it to use bitmaps
+ * instead of extents. A free space bitmap is keyed on
+ * (start, FREE_SPACE_BITMAP, length); the corresponding item is a bitmap with
+ * (length / sectorsize) bits.
+ */
+#define BTRFS_FREE_SPACE_BITMAP_KEY 200
+
#define BTRFS_DEV_EXTENT_KEY 204
#define BTRFS_DEV_ITEM_KEY 216
#define BTRFS_CHUNK_ITEM_KEY 228
@@ -2184,6 +2249,7 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23)
#define BTRFS_MOUNT_FRAGMENT_DATA (1 << 24)
#define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25)
+#define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 26)
#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
#define BTRFS_DEFAULT_MAX_INLINE (8192)
@@ -2506,6 +2572,11 @@ BTRFS_SETGET_FUNCS(disk_block_group_flags,
BTRFS_SETGET_STACK_FUNCS(block_group_flags,
struct btrfs_block_group_item, flags, 64);
+/* struct btrfs_free_space_info */
+BTRFS_SETGET_FUNCS(free_space_extent_count, struct btrfs_free_space_info,
+ extent_count, 32);
+BTRFS_SETGET_FUNCS(free_space_flags, struct btrfs_free_space_info, flags, 32);
+
/* struct btrfs_inode_ref */
BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16);
BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64);
@@ -3570,9 +3641,13 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
int __get_raid_index(u64 flags);
int btrfs_start_write_no_snapshoting(struct btrfs_root *root);
void btrfs_end_write_no_snapshoting(struct btrfs_root *root);
+void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
void check_system_chunk(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const u64 type);
+u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
+ struct btrfs_fs_info *info, u64 start, u64 end);
+
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
@@ -3737,6 +3812,7 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
kfree(fs_info->csum_root);
kfree(fs_info->quota_root);
kfree(fs_info->uuid_root);
+ kfree(fs_info->free_space_root);
kfree(fs_info->super_copy);
kfree(fs_info->super_for_commit);
security_free_mnt_opts(&fs_info->security_opts);
@@ -3906,7 +3982,6 @@ void btrfs_extent_item_to_extent_map(struct inode *inode,
/* inode.c */
struct btrfs_delalloc_work {
struct inode *inode;
- int wait;
int delay_iput;
struct completion completion;
struct list_head list;
@@ -3914,7 +3989,7 @@ struct btrfs_delalloc_work {
};
struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
- int wait, int delay_iput);
+ int delay_iput);
void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work);
struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
@@ -4024,7 +4099,8 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
struct btrfs_ioctl_space_info *space);
void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
struct btrfs_ioctl_balance_args *bargs);
-
+ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
+ struct file *dst_file, u64 dst_loff);
/* file.c */
int btrfs_auto_defrag_init(void);
@@ -4055,6 +4131,11 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
loff_t pos, size_t write_bytes,
struct extent_state **cached);
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
+ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t len, unsigned int flags);
+int btrfs_clone_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out, u64 len);
/* tree-defrag.c */
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
@@ -4247,16 +4328,98 @@ static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info,
}
}
+#define btrfs_clear_fs_incompat(__fs_info, opt) \
+ __btrfs_clear_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
+
+static inline void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info,
+ u64 flag)
+{
+ struct btrfs_super_block *disk_super;
+ u64 features;
+
+ disk_super = fs_info->super_copy;
+ features = btrfs_super_incompat_flags(disk_super);
+ if (features & flag) {
+ spin_lock(&fs_info->super_lock);
+ features = btrfs_super_incompat_flags(disk_super);
+ if (features & flag) {
+ features &= ~flag;
+ btrfs_set_super_incompat_flags(disk_super, features);
+ btrfs_info(fs_info, "clearing %llu feature flag",
+ flag);
+ }
+ spin_unlock(&fs_info->super_lock);
+ }
+}
+
#define btrfs_fs_incompat(fs_info, opt) \
__btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
-static inline int __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag)
+static inline bool __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag)
{
struct btrfs_super_block *disk_super;
disk_super = fs_info->super_copy;
return !!(btrfs_super_incompat_flags(disk_super) & flag);
}
+#define btrfs_set_fs_compat_ro(__fs_info, opt) \
+ __btrfs_set_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
+
+static inline void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info,
+ u64 flag)
+{
+ struct btrfs_super_block *disk_super;
+ u64 features;
+
+ disk_super = fs_info->super_copy;
+ features = btrfs_super_compat_ro_flags(disk_super);
+ if (!(features & flag)) {
+ spin_lock(&fs_info->super_lock);
+ features = btrfs_super_compat_ro_flags(disk_super);
+ if (!(features & flag)) {
+ features |= flag;
+ btrfs_set_super_compat_ro_flags(disk_super, features);
+ btrfs_info(fs_info, "setting %llu ro feature flag",
+ flag);
+ }
+ spin_unlock(&fs_info->super_lock);
+ }
+}
+
+#define btrfs_clear_fs_compat_ro(__fs_info, opt) \
+ __btrfs_clear_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
+
+static inline void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info,
+ u64 flag)
+{
+ struct btrfs_super_block *disk_super;
+ u64 features;
+
+ disk_super = fs_info->super_copy;
+ features = btrfs_super_compat_ro_flags(disk_super);
+ if (features & flag) {
+ spin_lock(&fs_info->super_lock);
+ features = btrfs_super_compat_ro_flags(disk_super);
+ if (features & flag) {
+ features &= ~flag;
+ btrfs_set_super_compat_ro_flags(disk_super, features);
+ btrfs_info(fs_info, "clearing %llu ro feature flag",
+ flag);
+ }
+ spin_unlock(&fs_info->super_lock);
+ }
+}
+
+#define btrfs_fs_compat_ro(fs_info, opt) \
+ __btrfs_fs_compat_ro((fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
+
+static inline int __btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag)
+{
+ struct btrfs_super_block *disk_super;
+ disk_super = fs_info->super_copy;
+ return !!(btrfs_super_compat_ro_flags(disk_super) & flag);
+}
+
/*
* Call btrfs_abort_transaction as early as possible when an error condition is
* detected, that way the exact line number is reported.
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 02b934d0e..b57daa895 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -54,16 +54,11 @@ static inline void btrfs_init_delayed_node(
delayed_node->root = root;
delayed_node->inode_id = inode_id;
atomic_set(&delayed_node->refs, 0);
- delayed_node->count = 0;
- delayed_node->flags = 0;
delayed_node->ins_root = RB_ROOT;
delayed_node->del_root = RB_ROOT;
mutex_init(&delayed_node->mutex);
- delayed_node->index_cnt = 0;
INIT_LIST_HEAD(&delayed_node->n_list);
INIT_LIST_HEAD(&delayed_node->p_list);
- delayed_node->bytes_reserved = 0;
- memset(&delayed_node->inode_item, 0, sizeof(delayed_node->inode_item));
}
static inline int btrfs_is_continuous_delayed_item(
@@ -132,7 +127,7 @@ again:
if (node)
return node;
- node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS);
+ node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS);
if (!node)
return ERR_PTR(-ENOMEM);
btrfs_init_delayed_node(node, root, ino);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index e06dd75ad..914ac13bd 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -493,12 +493,12 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
memcpy(&existing_ref->extent_op->key,
&ref->extent_op->key,
sizeof(ref->extent_op->key));
- existing_ref->extent_op->update_key = 1;
+ existing_ref->extent_op->update_key = true;
}
if (ref->extent_op->update_flags) {
existing_ref->extent_op->flags_to_set |=
ref->extent_op->flags_to_set;
- existing_ref->extent_op->update_flags = 1;
+ existing_ref->extent_op->update_flags = true;
}
btrfs_free_delayed_extent_op(ref->extent_op);
}
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 00ed02cbf..c24b653c7 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -75,11 +75,11 @@ struct btrfs_delayed_ref_node {
struct btrfs_delayed_extent_op {
struct btrfs_disk_key key;
+ u8 level;
+ bool update_key;
+ bool update_flags;
+ bool is_data;
u64 flags_to_set;
- int level;
- unsigned int update_key:1;
- unsigned int update_flags:1;
- unsigned int is_data:1;
};
/*
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 1e668fb7d..cbb7dbfb3 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -614,7 +614,7 @@ static void btrfs_dev_replace_update_device_in_mapping_tree(
em = lookup_extent_mapping(em_tree, start, (u64)-1);
if (!em)
break;
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
for (i = 0; i < map->num_stripes; i++)
if (srcdev == map->stripes[i].dev)
map->stripes[i].dev = tgtdev;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 41fb43183..4545e2e2a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -42,6 +42,7 @@
#include "locking.h"
#include "tree-log.h"
#include "free-space-cache.h"
+#include "free-space-tree.h"
#include "inode-map.h"
#include "check-integrity.h"
#include "rcu-string.h"
@@ -54,6 +55,12 @@
#include <asm/cpufeature.h>
#endif
+#define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\
+ BTRFS_HEADER_FLAG_RELOC |\
+ BTRFS_SUPER_FLAG_ERROR |\
+ BTRFS_SUPER_FLAG_SEEDING |\
+ BTRFS_SUPER_FLAG_METADUMP)
+
static const struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
static void free_fs_root(struct btrfs_root *root);
@@ -175,6 +182,7 @@ static struct btrfs_lockdep_keyset {
{ .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
{ .id = BTRFS_UUID_TREE_OBJECTID, .name_stem = "uuid" },
+ { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, .name_stem = "free-space" },
{ .id = 0, .name_stem = "tree" },
};
@@ -362,7 +370,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
}
lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
- 0, &cached_state);
+ &cached_state);
if (extent_buffer_uptodate(eb) &&
btrfs_header_generation(eb) == parent_transid) {
ret = 0;
@@ -923,7 +931,7 @@ static int check_async_write(struct inode *inode, unsigned long bio_flags)
if (bio_flags & EXTENT_BIO_TREE_LOG)
return 0;
#ifdef CONFIG_X86
- if (cpu_has_xmm4_2)
+ if (static_cpu_has_safe(X86_FEATURE_XMM4_2))
return 0;
#endif
return 1;
@@ -1665,6 +1673,9 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
if (location->objectid == BTRFS_UUID_TREE_OBJECTID)
return fs_info->uuid_root ? fs_info->uuid_root :
ERR_PTR(-ENOENT);
+ if (location->objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
+ return fs_info->free_space_root ? fs_info->free_space_root :
+ ERR_PTR(-ENOENT);
again:
root = btrfs_lookup_fs_root(fs_info, location->objectid);
if (root) {
@@ -2165,6 +2176,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
free_root_extent_buffers(info->uuid_root);
if (chunk_root)
free_root_extent_buffers(info->chunk_root);
+ free_root_extent_buffers(info->free_space_root);
}
void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
@@ -2465,6 +2477,15 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info,
fs_info->uuid_root = root;
}
+ if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
+ location.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID;
+ root = btrfs_read_tree_root(tree_root, &location);
+ if (IS_ERR(root))
+ return PTR_ERR(root);
+ set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+ fs_info->free_space_root = root;
+ }
+
return 0;
}
@@ -2745,26 +2766,6 @@ int open_ctree(struct super_block *sb,
goto fail_alloc;
}
- /*
- * Leafsize and nodesize were always equal, this is only a sanity check.
- */
- if (le32_to_cpu(disk_super->__unused_leafsize) !=
- btrfs_super_nodesize(disk_super)) {
- printk(KERN_ERR "BTRFS: couldn't mount because metadata "
- "blocksizes don't match. node %d leaf %d\n",
- btrfs_super_nodesize(disk_super),
- le32_to_cpu(disk_super->__unused_leafsize));
- err = -EINVAL;
- goto fail_alloc;
- }
- if (btrfs_super_nodesize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) {
- printk(KERN_ERR "BTRFS: couldn't mount because metadata "
- "blocksize (%d) was too large\n",
- btrfs_super_nodesize(disk_super));
- err = -EINVAL;
- goto fail_alloc;
- }
-
features = btrfs_super_incompat_flags(disk_super);
features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
@@ -2827,7 +2828,7 @@ int open_ctree(struct super_block *sb,
fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
- 4 * 1024 * 1024 / PAGE_CACHE_SIZE);
+ SZ_4M / PAGE_CACHE_SIZE);
tree_root->nodesize = nodesize;
tree_root->sectorsize = sectorsize;
@@ -2836,17 +2837,6 @@ int open_ctree(struct super_block *sb,
sb->s_blocksize = sectorsize;
sb->s_blocksize_bits = blksize_bits(sectorsize);
- if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
- printk(KERN_ERR "BTRFS: valid FS not found on %s\n", sb->s_id);
- goto fail_sb_buffer;
- }
-
- if (sectorsize != PAGE_SIZE) {
- printk(KERN_ERR "BTRFS: incompatible sector size (%lu) "
- "found on %s\n", (unsigned long)sectorsize, sb->s_id);
- goto fail_sb_buffer;
- }
-
mutex_lock(&fs_info->chunk_mutex);
ret = btrfs_read_sys_array(tree_root);
mutex_unlock(&fs_info->chunk_mutex);
@@ -3081,6 +3071,18 @@ retry_root_backup:
if (sb->s_flags & MS_RDONLY)
return 0;
+ if (btrfs_test_opt(tree_root, FREE_SPACE_TREE) &&
+ !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
+ pr_info("BTRFS: creating free space tree\n");
+ ret = btrfs_create_free_space_tree(fs_info);
+ if (ret) {
+ pr_warn("BTRFS: failed to create free space tree %d\n",
+ ret);
+ close_ctree(tree_root);
+ return ret;
+ }
+ }
+
down_read(&fs_info->cleanup_work_sem);
if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) ||
(ret = btrfs_orphan_cleanup(fs_info->tree_root))) {
@@ -3106,6 +3108,18 @@ retry_root_backup:
btrfs_qgroup_rescan_resume(fs_info);
+ if (btrfs_test_opt(tree_root, CLEAR_CACHE) &&
+ btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
+ pr_info("BTRFS: clearing free space tree\n");
+ ret = btrfs_clear_free_space_tree(fs_info);
+ if (ret) {
+ pr_warn("BTRFS: failed to clear free space tree %d\n",
+ ret);
+ close_ctree(tree_root);
+ return ret;
+ }
+ }
+
if (!fs_info->uuid_root) {
pr_info("BTRFS: creating UUID tree\n");
ret = btrfs_create_uuid_tree(fs_info);
@@ -3932,11 +3946,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
return !ret;
}
-int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
-{
- return set_extent_buffer_uptodate(buf);
-}
-
void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
{
struct btrfs_root *root;
@@ -3992,7 +4001,6 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
balance_dirty_pages_ratelimited(
root->fs_info->btree_inode->i_mapping);
}
- return;
}
void btrfs_btree_balance_dirty(struct btrfs_root *root)
@@ -4015,8 +4023,17 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
int read_only)
{
struct btrfs_super_block *sb = fs_info->super_copy;
+ u64 nodesize = btrfs_super_nodesize(sb);
+ u64 sectorsize = btrfs_super_sectorsize(sb);
int ret = 0;
+ if (btrfs_super_magic(sb) != BTRFS_MAGIC) {
+ printk(KERN_ERR "BTRFS: no valid FS found\n");
+ ret = -EINVAL;
+ }
+ if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP)
+ printk(KERN_WARNING "BTRFS: unrecognized super flag: %llu\n",
+ btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
printk(KERN_ERR "BTRFS: tree_root level too big: %d >= %d\n",
btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
@@ -4034,31 +4051,46 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
}
/*
- * The common minimum, we don't know if we can trust the nodesize/sectorsize
- * items yet, they'll be verified later. Issue just a warning.
+ * Check sectorsize and nodesize first, other check will need it.
+ * Check all possible sectorsize(4K, 8K, 16K, 32K, 64K) here.
*/
- if (!IS_ALIGNED(btrfs_super_root(sb), 4096))
+ if (!is_power_of_2(sectorsize) || sectorsize < 4096 ||
+ sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) {
+ printk(KERN_ERR "BTRFS: invalid sectorsize %llu\n", sectorsize);
+ ret = -EINVAL;
+ }
+ /* Only PAGE SIZE is supported yet */
+ if (sectorsize != PAGE_CACHE_SIZE) {
+ printk(KERN_ERR "BTRFS: sectorsize %llu not supported yet, only support %lu\n",
+ sectorsize, PAGE_CACHE_SIZE);
+ ret = -EINVAL;
+ }
+ if (!is_power_of_2(nodesize) || nodesize < sectorsize ||
+ nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) {
+ printk(KERN_ERR "BTRFS: invalid nodesize %llu\n", nodesize);
+ ret = -EINVAL;
+ }
+ if (nodesize != le32_to_cpu(sb->__unused_leafsize)) {
+ printk(KERN_ERR "BTRFS: invalid leafsize %u, should be %llu\n",
+ le32_to_cpu(sb->__unused_leafsize),
+ nodesize);
+ ret = -EINVAL;
+ }
+
+ /* Root alignment check */
+ if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) {
printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n",
btrfs_super_root(sb));
- if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096))
+ ret = -EINVAL;
+ }
+ if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) {
printk(KERN_WARNING "BTRFS: chunk_root block unaligned: %llu\n",
btrfs_super_chunk_root(sb));
- if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096))
- printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
- btrfs_super_log_root(sb));
-
- /*
- * Check the lower bound, the alignment and other constraints are
- * checked later.
- */
- if (btrfs_super_nodesize(sb) < 4096) {
- printk(KERN_ERR "BTRFS: nodesize too small: %u < 4096\n",
- btrfs_super_nodesize(sb));
ret = -EINVAL;
}
- if (btrfs_super_sectorsize(sb) < 4096) {
- printk(KERN_ERR "BTRFS: sectorsize too small: %u < 4096\n",
- btrfs_super_sectorsize(sb));
+ if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) {
+ printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
+ btrfs_super_log_root(sb));
ret = -EINVAL;
}
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index adeb31830..8e79d0070 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -19,7 +19,7 @@
#ifndef __DISKIO__
#define __DISKIO__
-#define BTRFS_SUPER_INFO_OFFSET (64 * 1024)
+#define BTRFS_SUPER_INFO_OFFSET SZ_64K
#define BTRFS_SUPER_INFO_SIZE 4096
#define BTRFS_SUPER_MIRROR_MAX 3
@@ -35,7 +35,7 @@ enum btrfs_wq_endio_type {
static inline u64 btrfs_sb_offset(int mirror)
{
- u64 start = 16 * 1024;
+ u64 start = SZ_16K;
if (mirror)
return start << (BTRFS_SUPER_MIRROR_SHIFT * mirror);
return BTRFS_SUPER_INFO_OFFSET;
@@ -116,7 +116,6 @@ static inline void btrfs_put_fs_root(struct btrfs_root *root)
void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
int atomic);
-int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
u32 btrfs_csum_data(char *data, u32 seed, size_t len);
void btrfs_csum_final(u32 crc, char *result);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2368cac11..e2287c7c1 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -33,6 +33,7 @@
#include "raid56.h"
#include "locking.h"
#include "free-space-cache.h"
+#include "free-space-tree.h"
#include "math.h"
#include "sysfs.h"
#include "qgroup.h"
@@ -357,8 +358,8 @@ static void fragment_free_space(struct btrfs_root *root,
* we need to check the pinned_extents for any extents that can't be used yet
* since their free space will be released as soon as the transaction commits.
*/
-static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
- struct btrfs_fs_info *info, u64 start, u64 end)
+u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
+ struct btrfs_fs_info *info, u64 start, u64 end)
{
u64 extent_start, extent_end, size, total_added = 0;
int ret;
@@ -395,11 +396,10 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
return total_added;
}
-static noinline void caching_thread(struct btrfs_work *work)
+static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
{
struct btrfs_block_group_cache *block_group;
struct btrfs_fs_info *fs_info;
- struct btrfs_caching_control *caching_ctl;
struct btrfs_root *extent_root;
struct btrfs_path *path;
struct extent_buffer *leaf;
@@ -407,17 +407,16 @@ static noinline void caching_thread(struct btrfs_work *work)
u64 total_found = 0;
u64 last = 0;
u32 nritems;
- int ret = -ENOMEM;
+ int ret;
bool wakeup = true;
- caching_ctl = container_of(work, struct btrfs_caching_control, work);
block_group = caching_ctl->block_group;
fs_info = block_group->fs_info;
extent_root = fs_info->extent_root;
path = btrfs_alloc_path();
if (!path)
- goto out;
+ return -ENOMEM;
last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
@@ -438,20 +437,16 @@ static noinline void caching_thread(struct btrfs_work *work)
*/
path->skip_locking = 1;
path->search_commit_root = 1;
- path->reada = 1;
+ path->reada = READA_FORWARD;
key.objectid = last;
key.offset = 0;
key.type = BTRFS_EXTENT_ITEM_KEY;
-again:
- mutex_lock(&caching_ctl->mutex);
- /* need to make sure the commit_root doesn't disappear */
- down_read(&fs_info->commit_root_sem);
next:
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
- goto err;
+ goto out;
leaf = path->nodes[0];
nritems = btrfs_header_nritems(leaf);
@@ -477,12 +472,14 @@ next:
up_read(&fs_info->commit_root_sem);
mutex_unlock(&caching_ctl->mutex);
cond_resched();
- goto again;
+ mutex_lock(&caching_ctl->mutex);
+ down_read(&fs_info->commit_root_sem);
+ goto next;
}
ret = btrfs_next_leaf(extent_root, path);
if (ret < 0)
- goto err;
+ goto out;
if (ret)
break;
leaf = path->nodes[0];
@@ -521,7 +518,7 @@ next:
else
last = key.objectid + key.offset;
- if (total_found > (1024 * 1024 * 2)) {
+ if (total_found > CACHING_CTL_WAKE_UP) {
total_found = 0;
if (wakeup)
wake_up(&caching_ctl->wait);
@@ -534,9 +531,37 @@ next:
total_found += add_new_free_space(block_group, fs_info, last,
block_group->key.objectid +
block_group->key.offset);
+ caching_ctl->progress = (u64)-1;
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static noinline void caching_thread(struct btrfs_work *work)
+{
+ struct btrfs_block_group_cache *block_group;
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_caching_control *caching_ctl;
+ struct btrfs_root *extent_root;
+ int ret;
+
+ caching_ctl = container_of(work, struct btrfs_caching_control, work);
+ block_group = caching_ctl->block_group;
+ fs_info = block_group->fs_info;
+ extent_root = fs_info->extent_root;
+
+ mutex_lock(&caching_ctl->mutex);
+ down_read(&fs_info->commit_root_sem);
+
+ if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+ ret = load_free_space_tree(caching_ctl);
+ else
+ ret = load_extent_tree_free(caching_ctl);
+
spin_lock(&block_group->lock);
block_group->caching_ctl = NULL;
- block_group->cached = BTRFS_CACHE_FINISHED;
+ block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED;
spin_unlock(&block_group->lock);
#ifdef CONFIG_BTRFS_DEBUG
@@ -555,20 +580,11 @@ next:
#endif
caching_ctl->progress = (u64)-1;
-err:
- btrfs_free_path(path);
- up_read(&fs_info->commit_root_sem);
-
- free_excluded_extents(extent_root, block_group);
+ up_read(&fs_info->commit_root_sem);
+ free_excluded_extents(fs_info->extent_root, block_group);
mutex_unlock(&caching_ctl->mutex);
-out:
- if (ret) {
- spin_lock(&block_group->lock);
- block_group->caching_ctl = NULL;
- block_group->cached = BTRFS_CACHE_ERROR;
- spin_unlock(&block_group->lock);
- }
+
wake_up(&caching_ctl->wait);
put_caching_control(caching_ctl);
@@ -680,8 +696,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
}
} else {
/*
- * We are not going to do the fast caching, set cached to the
- * appropriate value and wakeup any waiters.
+ * We're either using the free space tree or no caching at all.
+ * Set cached to the appropriate value and wakeup any waiters.
*/
spin_lock(&cache->lock);
if (load_cache_only) {
@@ -2115,7 +2131,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
- path->reada = 1;
+ path->reada = READA_FORWARD;
path->leave_spinning = 1;
/* this will setup the path even if it fails to insert the back ref */
ret = insert_inline_extent_backref(trans, fs_info->extent_root, path,
@@ -2141,7 +2157,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
- path->reada = 1;
+ path->reada = READA_FORWARD;
path->leave_spinning = 1;
/* now insert the actual backref */
ret = insert_extent_backref(trans, root->fs_info->extent_root,
@@ -2254,7 +2270,7 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
}
again:
- path->reada = 1;
+ path->reada = READA_FORWARD;
path->leave_spinning = 1;
ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
path, 0, 1);
@@ -2910,6 +2926,9 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
if (trans->aborted)
return 0;
+ if (root->fs_info->creating_free_space_tree)
+ return 0;
+
if (root == root->fs_info->extent_root)
root = root->fs_info->tree_root;
@@ -2988,9 +3007,9 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
return -ENOMEM;
extent_op->flags_to_set = flags;
- extent_op->update_flags = 1;
- extent_op->update_key = 0;
- extent_op->is_data = is_data ? 1 : 0;
+ extent_op->update_flags = true;
+ extent_op->update_key = false;
+ extent_op->is_data = is_data ? true : false;
extent_op->level = level;
ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
@@ -3328,7 +3347,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
* If this block group is smaller than 100 megs don't bother caching the
* block group.
*/
- if (block_group->key.offset < (100 * 1024 * 1024)) {
+ if (block_group->key.offset < (100 * SZ_1M)) {
spin_lock(&block_group->lock);
block_group->disk_cache_state = BTRFS_DC_WRITTEN;
spin_unlock(&block_group->lock);
@@ -3428,7 +3447,7 @@ again:
* taking up quite a bit since it's not folded into the other space
* cache.
*/
- num_pages = div_u64(block_group->key.offset, 256 * 1024 * 1024);
+ num_pages = div_u64(block_group->key.offset, SZ_256M);
if (!num_pages)
num_pages = 1;
@@ -3684,11 +3703,21 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
return -ENOMEM;
/*
- * We don't need the lock here since we are protected by the transaction
- * commit. We want to do the cache_save_setup first and then run the
+ * Even though we are in the critical section of the transaction commit,
+ * we can still have concurrent tasks adding elements to this
+ * transaction's list of dirty block groups. These tasks correspond to
+ * endio free space workers started when writeback finishes for a
+ * space cache, which run inode.c:btrfs_finish_ordered_io(), and can
+ * allocate new block groups as a result of COWing nodes of the root
+ * tree when updating the free space inode. The writeback for the space
+ * caches is triggered by an earlier call to
+ * btrfs_start_dirty_block_groups() and iterations of the following
+ * loop.
+ * Also we want to do the cache_save_setup first and then run the
* delayed refs to make sure we have the best chance at doing this all
* in one shot.
*/
+ spin_lock(&cur_trans->dirty_bgs_lock);
while (!list_empty(&cur_trans->dirty_bgs)) {
cache = list_first_entry(&cur_trans->dirty_bgs,
struct btrfs_block_group_cache,
@@ -3700,11 +3729,13 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
* finish and then do it all again
*/
if (!list_empty(&cache->io_list)) {
+ spin_unlock(&cur_trans->dirty_bgs_lock);
list_del_init(&cache->io_list);
btrfs_wait_cache_io(root, trans, cache,
&cache->io_ctl, path,
cache->key.objectid);
btrfs_put_block_group(cache);
+ spin_lock(&cur_trans->dirty_bgs_lock);
}
/*
@@ -3712,6 +3743,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
* on any pending IO
*/
list_del_init(&cache->dirty_list);
+ spin_unlock(&cur_trans->dirty_bgs_lock);
should_put = 1;
cache_save_setup(cache, trans, path);
@@ -3736,6 +3768,25 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
}
if (!ret) {
ret = write_one_cache_group(trans, root, path, cache);
+ /*
+ * One of the free space endio workers might have
+ * created a new block group while updating a free space
+ * cache's inode (at inode.c:btrfs_finish_ordered_io())
+ * and hasn't released its transaction handle yet, in
+ * which case the new block group is still attached to
+ * its transaction handle and its creation has not
+ * finished yet (no block group item in the extent tree
+ * yet, etc). If this is the case, wait for all free
+ * space endio workers to finish and retry. This is a
+ * a very rare case so no need for a more efficient and
+ * complex approach.
+ */
+ if (ret == -ENOENT) {
+ wait_event(cur_trans->writer_wait,
+ atomic_read(&cur_trans->num_writers) == 1);
+ ret = write_one_cache_group(trans, root, path,
+ cache);
+ }
if (ret)
btrfs_abort_transaction(trans, root, ret);
}
@@ -3743,7 +3794,9 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
/* if its not on the io list, we need to put the block group */
if (should_put)
btrfs_put_block_group(cache);
+ spin_lock(&cur_trans->dirty_bgs_lock);
}
+ spin_unlock(&cur_trans->dirty_bgs_lock);
while (!list_empty(io)) {
cache = list_first_entry(io, struct btrfs_block_group_cache,
@@ -4242,14 +4295,13 @@ static int should_alloc_chunk(struct btrfs_root *root,
*/
if (force == CHUNK_ALLOC_LIMITED) {
thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
- thresh = max_t(u64, 64 * 1024 * 1024,
- div_factor_fine(thresh, 1));
+ thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1));
if (num_bytes - num_allocated < thresh)
return 1;
}
- if (num_allocated + 2 * 1024 * 1024 < div_factor(num_bytes, 8))
+ if (num_allocated + SZ_2M < div_factor(num_bytes, 8))
return 0;
return 1;
}
@@ -4449,7 +4501,7 @@ out:
* transaction.
*/
if (trans->can_flush_pending_bgs &&
- trans->chunk_bytes_reserved >= (2 * 1024 * 1024ull)) {
+ trans->chunk_bytes_reserved >= (u64)SZ_2M) {
btrfs_create_pending_block_groups(trans, trans->root);
btrfs_trans_release_chunk_metadata(trans);
}
@@ -4547,7 +4599,7 @@ static inline int calc_reclaim_items_nr(struct btrfs_root *root, u64 to_reclaim)
return nr;
}
-#define EXTENT_SIZE_PER_ITEM (256 * 1024)
+#define EXTENT_SIZE_PER_ITEM SZ_256K
/*
* shrink metadata reservation for delalloc
@@ -4752,8 +4804,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
u64 expected;
u64 to_reclaim;
- to_reclaim = min_t(u64, num_online_cpus() * 1024 * 1024,
- 16 * 1024 * 1024);
+ to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
spin_lock(&space_info->lock);
if (can_overcommit(root, space_info, to_reclaim,
BTRFS_RESERVE_FLUSH_ALL)) {
@@ -4764,8 +4815,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly +
space_info->bytes_may_use;
- if (can_overcommit(root, space_info, 1024 * 1024,
- BTRFS_RESERVE_FLUSH_ALL))
+ if (can_overcommit(root, space_info, SZ_1M, BTRFS_RESERVE_FLUSH_ALL))
expected = div_factor_fine(space_info->total_bytes, 95);
else
expected = div_factor_fine(space_info->total_bytes, 90);
@@ -5321,7 +5371,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
spin_lock(&sinfo->lock);
spin_lock(&block_rsv->lock);
- block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024);
+ block_rsv->size = min_t(u64, num_bytes, SZ_512M);
num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
sinfo->bytes_reserved + sinfo->bytes_readonly +
@@ -6225,11 +6275,11 @@ fetch_cluster_info(struct btrfs_root *root, struct btrfs_space_info *space_info,
return ret;
if (ssd)
- *empty_cluster = 2 * 1024 * 1024;
+ *empty_cluster = SZ_2M;
if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
ret = &root->fs_info->meta_alloc_cluster;
if (!ssd)
- *empty_cluster = 64 * 1024;
+ *empty_cluster = SZ_64K;
} else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) && ssd) {
ret = &root->fs_info->data_alloc_cluster;
}
@@ -6441,7 +6491,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
- path->reada = 1;
+ path->reada = READA_FORWARD;
path->leave_spinning = 1;
is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
@@ -6664,6 +6714,13 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
}
+ ret = add_to_free_space_tree(trans, root->fs_info, bytenr,
+ num_bytes);
+ if (ret) {
+ btrfs_abort_transaction(trans, extent_root, ret);
+ goto out;
+ }
+
ret = update_block_group(trans, root, bytenr, num_bytes, 0);
if (ret) {
btrfs_abort_transaction(trans, extent_root, ret);
@@ -7675,6 +7732,11 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_free_path(path);
+ ret = remove_from_free_space_tree(trans, fs_info, ins->objectid,
+ ins->offset);
+ if (ret)
+ return ret;
+
ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
if (ret) { /* -ENOENT, logic error */
btrfs_err(fs_info, "update block group failed for %llu %llu",
@@ -7755,6 +7817,11 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
btrfs_free_path(path);
+ ret = remove_from_free_space_tree(trans, fs_info, ins->objectid,
+ num_bytes);
+ if (ret)
+ return ret;
+
ret = update_block_group(trans, root, ins->objectid, root->nodesize,
1);
if (ret) { /* -ENOENT, logic error */
@@ -7837,7 +7904,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
btrfs_set_lock_blocking(buf);
- btrfs_set_buffer_uptodate(buf);
+ set_extent_buffer_uptodate(buf);
if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
buf->log_index = root->log_transid % 2;
@@ -7983,12 +8050,9 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
else
memset(&extent_op->key, 0, sizeof(extent_op->key));
extent_op->flags_to_set = flags;
- if (skinny_metadata)
- extent_op->update_key = 0;
- else
- extent_op->update_key = 1;
- extent_op->update_flags = 1;
- extent_op->is_data = 0;
+ extent_op->update_key = skinny_metadata ? false : true;
+ extent_op->update_flags = true;
+ extent_op->is_data = false;
extent_op->level = level;
ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
@@ -9127,7 +9191,7 @@ static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
if ((sinfo->flags &
(BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
!force)
- min_allocable_bytes = 1 * 1024 * 1024;
+ min_allocable_bytes = SZ_1M;
else
min_allocable_bytes = 0;
@@ -9659,6 +9723,8 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
cache->full_stripe_len = btrfs_full_stripe_len(root,
&root->fs_info->mapping_tree,
start);
+ set_free_space_tree_thresholds(cache);
+
atomic_set(&cache->count, 1);
spin_lock_init(&cache->lock);
init_rwsem(&cache->data_rwsem);
@@ -9670,6 +9736,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
INIT_LIST_HEAD(&cache->io_list);
btrfs_init_free_space_ctl(cache);
atomic_set(&cache->trimming, 0);
+ mutex_init(&cache->free_space_lock);
return cache;
}
@@ -9694,7 +9761,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- path->reada = 1;
+ path->reada = READA_FORWARD;
cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
if (btrfs_test_opt(root, SPACE_CACHE) &&
@@ -9880,6 +9947,8 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
key.objectid, key.offset);
if (ret)
btrfs_abort_transaction(trans, extent_root, ret);
+ add_block_group_free_space(trans, root->fs_info, block_group);
+ /* already aborted the transaction if it failed. */
next:
list_del_init(&block_group->bg_list);
}
@@ -9910,6 +9979,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
cache->flags = type;
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
+ cache->needs_free_space = 1;
ret = exclude_super_stripes(root, cache);
if (ret) {
/*
@@ -10280,6 +10350,10 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
unlock_chunks(root);
+ ret = remove_block_group_free_space(trans, root->fs_info, block_group);
+ if (ret)
+ goto out;
+
btrfs_put_block_group(block_group);
btrfs_put_block_group(block_group);
@@ -10328,7 +10402,7 @@ btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info,
* more device items and remove one chunk item), but this is done at
* btrfs_remove_chunk() through a call to check_system_chunk().
*/
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
num_items = 3 + map->num_stripes;
free_extent_map(em);
@@ -10515,7 +10589,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
disk_super = fs_info->super_copy;
if (!btrfs_super_root(disk_super))
- return 1;
+ return -EINVAL;
features = btrfs_super_incompat_flags(disk_super);
if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
@@ -10745,3 +10819,23 @@ int btrfs_start_write_no_snapshoting(struct btrfs_root *root)
}
return 1;
}
+
+static int wait_snapshoting_atomic_t(atomic_t *a)
+{
+ schedule();
+ return 0;
+}
+
+void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
+{
+ while (true) {
+ int ret;
+
+ ret = btrfs_start_write_no_snapshoting(root);
+ if (ret)
+ break;
+ wait_on_atomic_t(&root->will_be_snapshoted,
+ wait_snapshoting_atomic_t,
+ TASK_UNINTERRUPTIBLE);
+ }
+}
diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
deleted file mode 100644
index e69de29bb..000000000
--- a/fs/btrfs/extent-tree.h
+++ /dev/null
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 9abe18763..392592dc7 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1285,20 +1285,6 @@ search_again:
}
/* wrappers around set/clear extent bit */
-int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL,
- NULL, mask);
-}
-
-int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, gfp_t mask)
-{
- return set_extent_bit(tree, start, end, bits, NULL,
- NULL, mask);
-}
-
int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, gfp_t mask,
struct extent_changeset *changeset)
@@ -1323,17 +1309,6 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
cached, mask, NULL);
}
-int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, gfp_t mask)
-{
- int wake = 0;
-
- if (bits & EXTENT_LOCKED)
- wake = 1;
-
- return clear_extent_bit(tree, start, end, bits, wake, 0, NULL, mask);
-}
-
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, gfp_t mask,
struct extent_changeset *changeset)
@@ -1348,63 +1323,18 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
changeset);
}
-int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached_state, gfp_t mask)
-{
- return set_extent_bit(tree, start, end,
- EXTENT_DELALLOC | EXTENT_UPTODATE,
- NULL, cached_state, mask);
-}
-
-int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached_state, gfp_t mask)
-{
- return set_extent_bit(tree, start, end,
- EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG,
- NULL, cached_state, mask);
-}
-
-int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return clear_extent_bit(tree, start, end,
- EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask);
-}
-
-int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return set_extent_bit(tree, start, end, EXTENT_NEW, NULL,
- NULL, mask);
-}
-
-int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached_state, gfp_t mask)
-{
- return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL,
- cached_state, mask);
-}
-
-int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached_state, gfp_t mask)
-{
- return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
- cached_state, mask);
-}
-
/*
* either insert or lock state struct between start and end use mask to tell
* us if waiting is desired.
*/
int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, struct extent_state **cached_state)
+ struct extent_state **cached_state)
{
int err;
u64 failed_start;
while (1) {
- err = __set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
+ err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
EXTENT_LOCKED, &failed_start,
cached_state, GFP_NOFS, NULL);
if (err == -EEXIST) {
@@ -1417,11 +1347,6 @@ int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
return err;
}
-int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
-{
- return lock_extent_bits(tree, start, end, 0, NULL);
-}
-
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
{
int err;
@@ -1438,20 +1363,7 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
return 1;
}
-int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached, gfp_t mask)
-{
- return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
- mask);
-}
-
-int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
-{
- return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
- GFP_NOFS);
-}
-
-int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
+void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
{
unsigned long index = start >> PAGE_CACHE_SHIFT;
unsigned long end_index = end >> PAGE_CACHE_SHIFT;
@@ -1464,10 +1376,9 @@ int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
page_cache_release(page);
index++;
}
- return 0;
}
-int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
+void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
{
unsigned long index = start >> PAGE_CACHE_SHIFT;
unsigned long end_index = end >> PAGE_CACHE_SHIFT;
@@ -1481,13 +1392,12 @@ int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
page_cache_release(page);
index++;
}
- return 0;
}
/*
* helper function to set both pages and extents in the tree writeback
*/
-static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
+static void set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
{
unsigned long index = start >> PAGE_CACHE_SHIFT;
unsigned long end_index = end >> PAGE_CACHE_SHIFT;
@@ -1500,7 +1410,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
page_cache_release(page);
index++;
}
- return 0;
}
/* find the first state struct with 'bits' set after 'start', and
@@ -1800,7 +1709,7 @@ again:
BUG_ON(ret); /* Only valid values are 0 and -EAGAIN */
/* step three, lock the state bits for the whole range */
- lock_extent_bits(tree, delalloc_start, delalloc_end, 0, &cached_state);
+ lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state);
/* then test to make sure it is all still delalloc */
ret = test_range_bit(tree, delalloc_start, delalloc_end,
@@ -1820,7 +1729,7 @@ out_failed:
return found;
}
-int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
+void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
struct page *locked_page,
unsigned clear_bits,
unsigned long page_ops)
@@ -1835,7 +1744,7 @@ int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
if (page_ops == 0)
- return 0;
+ return;
if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
mapping_set_error(inode->i_mapping, -EIO);
@@ -1869,7 +1778,6 @@ int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
index += ret;
cond_resched();
}
- return 0;
}
/*
@@ -2516,7 +2424,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
/* lots and lots of room for performance fixes in the end_bio funcs */
-int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
+void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
{
int uptodate = (err == 0);
struct extent_io_tree *tree;
@@ -2537,7 +2445,6 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
ret = ret < 0 ? ret : -EIO;
mapping_set_error(page->mapping, ret);
}
- return 0;
}
/*
@@ -2579,9 +2486,7 @@ static void end_bio_extent_writepage(struct bio *bio)
start = page_offset(page);
end = start + bvec->bv_offset + bvec->bv_len - 1;
- if (end_extent_writepage(page, bio->bi_error, start, end))
- continue;
-
+ end_extent_writepage(page, bio->bi_error, start, end);
end_page_writeback(page);
}
@@ -2992,12 +2897,11 @@ static int __do_readpage(struct extent_io_tree *tree,
struct block_device *bdev;
int ret;
int nr = 0;
- int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
size_t pg_offset = 0;
size_t iosize;
size_t disk_io_size;
size_t blocksize = inode->i_sb->s_blocksize;
- unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
+ unsigned long this_bio_flag = 0;
set_page_extent_mapped(page);
@@ -3037,18 +2941,16 @@ static int __do_readpage(struct extent_io_tree *tree,
kunmap_atomic(userpage);
set_extent_uptodate(tree, cur, cur + iosize - 1,
&cached, GFP_NOFS);
- if (!parent_locked)
- unlock_extent_cached(tree, cur,
- cur + iosize - 1,
- &cached, GFP_NOFS);
+ unlock_extent_cached(tree, cur,
+ cur + iosize - 1,
+ &cached, GFP_NOFS);
break;
}
em = __get_extent_map(inode, page, pg_offset, cur,
end - cur + 1, get_extent, em_cached);
if (IS_ERR_OR_NULL(em)) {
SetPageError(page);
- if (!parent_locked)
- unlock_extent(tree, cur, end);
+ unlock_extent(tree, cur, end);
break;
}
extent_offset = cur - em->start;
@@ -3133,12 +3035,9 @@ static int __do_readpage(struct extent_io_tree *tree,
set_extent_uptodate(tree, cur, cur + iosize - 1,
&cached, GFP_NOFS);
- if (parent_locked)
- free_extent_state(cached);
- else
- unlock_extent_cached(tree, cur,
- cur + iosize - 1,
- &cached, GFP_NOFS);
+ unlock_extent_cached(tree, cur,
+ cur + iosize - 1,
+ &cached, GFP_NOFS);
cur = cur + iosize;
pg_offset += iosize;
continue;
@@ -3147,8 +3046,7 @@ static int __do_readpage(struct extent_io_tree *tree,
if (test_range_bit(tree, cur, cur_end,
EXTENT_UPTODATE, 1, NULL)) {
check_page_uptodate(tree, page);
- if (!parent_locked)
- unlock_extent(tree, cur, cur + iosize - 1);
+ unlock_extent(tree, cur, cur + iosize - 1);
cur = cur + iosize;
pg_offset += iosize;
continue;
@@ -3158,8 +3056,7 @@ static int __do_readpage(struct extent_io_tree *tree,
*/
if (block_start == EXTENT_MAP_INLINE) {
SetPageError(page);
- if (!parent_locked)
- unlock_extent(tree, cur, cur + iosize - 1);
+ unlock_extent(tree, cur, cur + iosize - 1);
cur = cur + iosize;
pg_offset += iosize;
continue;
@@ -3178,8 +3075,7 @@ static int __do_readpage(struct extent_io_tree *tree,
*bio_flags = this_bio_flag;
} else {
SetPageError(page);
- if (!parent_locked)
- unlock_extent(tree, cur, cur + iosize - 1);
+ unlock_extent(tree, cur, cur + iosize - 1);
}
cur = cur + iosize;
pg_offset += iosize;
@@ -3308,20 +3204,6 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
return ret;
}
-int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
- get_extent_t *get_extent, int mirror_num)
-{
- struct bio *bio = NULL;
- unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED;
- int ret;
-
- ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
- &bio_flags, READ, NULL);
- if (bio)
- ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
- return ret;
-}
-
static noinline void update_nr_written(struct page *page,
struct writeback_control *wbc,
unsigned long nr_written)
@@ -4326,7 +4208,7 @@ int extent_invalidatepage(struct extent_io_tree *tree,
if (start > end)
return 0;
- lock_extent_bits(tree, start, end, 0, &cached_state);
+ lock_extent_bits(tree, start, end, &cached_state);
wait_on_page_writeback(page);
clear_extent_bit(tree, start, end,
EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
@@ -4387,7 +4269,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
u64 end = start + PAGE_CACHE_SIZE - 1;
if (gfpflags_allow_blocking(mask) &&
- page->mapping->host->i_size > 16 * 1024 * 1024) {
+ page->mapping->host->i_size > SZ_16M) {
u64 len;
while (start <= end) {
len = end - start + 1;
@@ -4536,7 +4418,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
last_for_get_extent = isize;
}
- lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1, 0,
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1,
&cached_state);
em = get_extent_skip_holes(inode, start, last_for_get_extent,
@@ -4797,24 +4679,14 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
return new;
}
-struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
- u64 start)
+struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
+ u64 start, unsigned long len)
{
struct extent_buffer *eb;
- unsigned long len;
unsigned long num_pages;
unsigned long i;
- if (!fs_info) {
- /*
- * Called only from tests that don't always have a fs_info
- * available, but we know that nodesize is 4096
- */
- len = 4096;
- } else {
- len = fs_info->tree_root->nodesize;
- }
- num_pages = num_extent_pages(0, len);
+ num_pages = num_extent_pages(start, len);
eb = __alloc_extent_buffer(fs_info, start, len);
if (!eb)
@@ -4837,6 +4709,24 @@ err:
return NULL;
}
+struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
+ u64 start)
+{
+ unsigned long len;
+
+ if (!fs_info) {
+ /*
+ * Called only from tests that don't always have a fs_info
+ * available, but we know that nodesize is 4096
+ */
+ len = 4096;
+ } else {
+ len = fs_info->tree_root->nodesize;
+ }
+
+ return __alloc_dummy_extent_buffer(fs_info, start, len);
+}
+
static void check_buffer_tree_ref(struct extent_buffer *eb)
{
int refs;
@@ -5227,7 +5117,7 @@ int set_extent_buffer_dirty(struct extent_buffer *eb)
return was_dirty;
}
-int clear_extent_buffer_uptodate(struct extent_buffer *eb)
+void clear_extent_buffer_uptodate(struct extent_buffer *eb)
{
unsigned long i;
struct page *page;
@@ -5240,10 +5130,9 @@ int clear_extent_buffer_uptodate(struct extent_buffer *eb)
if (page)
ClearPageUptodate(page);
}
- return 0;
}
-int set_extent_buffer_uptodate(struct extent_buffer *eb)
+void set_extent_buffer_uptodate(struct extent_buffer *eb)
{
unsigned long i;
struct page *page;
@@ -5255,7 +5144,6 @@ int set_extent_buffer_uptodate(struct extent_buffer *eb)
page = eb->pages[i];
SetPageUptodate(page);
}
- return 0;
}
int extent_buffer_uptodate(struct extent_buffer *eb)
@@ -5594,6 +5482,155 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
}
}
+/*
+ * The extent buffer bitmap operations are done with byte granularity because
+ * bitmap items are not guaranteed to be aligned to a word and therefore a
+ * single word in a bitmap may straddle two pages in the extent buffer.
+ */
+#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE)
+#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1)
+#define BITMAP_FIRST_BYTE_MASK(start) \
+ ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK)
+#define BITMAP_LAST_BYTE_MASK(nbits) \
+ (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
+
+/*
+ * eb_bitmap_offset() - calculate the page and offset of the byte containing the
+ * given bit number
+ * @eb: the extent buffer
+ * @start: offset of the bitmap item in the extent buffer
+ * @nr: bit number
+ * @page_index: return index of the page in the extent buffer that contains the
+ * given bit number
+ * @page_offset: return offset into the page given by page_index
+ *
+ * This helper hides the ugliness of finding the byte in an extent buffer which
+ * contains a given bit.
+ */
+static inline void eb_bitmap_offset(struct extent_buffer *eb,
+ unsigned long start, unsigned long nr,
+ unsigned long *page_index,
+ size_t *page_offset)
+{
+ size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+ size_t byte_offset = BIT_BYTE(nr);
+ size_t offset;
+
+ /*
+ * The byte we want is the offset of the extent buffer + the offset of
+ * the bitmap item in the extent buffer + the offset of the byte in the
+ * bitmap item.
+ */
+ offset = start_offset + start + byte_offset;
+
+ *page_index = offset >> PAGE_CACHE_SHIFT;
+ *page_offset = offset & (PAGE_CACHE_SIZE - 1);
+}
+
+/**
+ * extent_buffer_test_bit - determine whether a bit in a bitmap item is set
+ * @eb: the extent buffer
+ * @start: offset of the bitmap item in the extent buffer
+ * @nr: bit number to test
+ */
+int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
+ unsigned long nr)
+{
+ char *kaddr;
+ struct page *page;
+ unsigned long i;
+ size_t offset;
+
+ eb_bitmap_offset(eb, start, nr, &i, &offset);
+ page = eb->pages[i];
+ WARN_ON(!PageUptodate(page));
+ kaddr = page_address(page);
+ return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1)));
+}
+
+/**
+ * extent_buffer_bitmap_set - set an area of a bitmap
+ * @eb: the extent buffer
+ * @start: offset of the bitmap item in the extent buffer
+ * @pos: bit number of the first bit
+ * @len: number of bits to set
+ */
+void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
+ unsigned long pos, unsigned long len)
+{
+ char *kaddr;
+ struct page *page;
+ unsigned long i;
+ size_t offset;
+ const unsigned int size = pos + len;
+ int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
+ unsigned int mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
+
+ eb_bitmap_offset(eb, start, pos, &i, &offset);
+ page = eb->pages[i];
+ WARN_ON(!PageUptodate(page));
+ kaddr = page_address(page);
+
+ while (len >= bits_to_set) {
+ kaddr[offset] |= mask_to_set;
+ len -= bits_to_set;
+ bits_to_set = BITS_PER_BYTE;
+ mask_to_set = ~0U;
+ if (++offset >= PAGE_CACHE_SIZE && len > 0) {
+ offset = 0;
+ page = eb->pages[++i];
+ WARN_ON(!PageUptodate(page));
+ kaddr = page_address(page);
+ }
+ }
+ if (len) {
+ mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
+ kaddr[offset] |= mask_to_set;
+ }
+}
+
+
+/**
+ * extent_buffer_bitmap_clear - clear an area of a bitmap
+ * @eb: the extent buffer
+ * @start: offset of the bitmap item in the extent buffer
+ * @pos: bit number of the first bit
+ * @len: number of bits to clear
+ */
+void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
+ unsigned long pos, unsigned long len)
+{
+ char *kaddr;
+ struct page *page;
+ unsigned long i;
+ size_t offset;
+ const unsigned int size = pos + len;
+ int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
+ unsigned int mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
+
+ eb_bitmap_offset(eb, start, pos, &i, &offset);
+ page = eb->pages[i];
+ WARN_ON(!PageUptodate(page));
+ kaddr = page_address(page);
+
+ while (len >= bits_to_clear) {
+ kaddr[offset] &= ~mask_to_clear;
+ len -= bits_to_clear;
+ bits_to_clear = BITS_PER_BYTE;
+ mask_to_clear = ~0U;
+ if (++offset >= PAGE_CACHE_SIZE && len > 0) {
+ offset = 0;
+ page = eb->pages[++i];
+ WARN_ON(!PageUptodate(page));
+ kaddr = page_address(page);
+ }
+ }
+ if (len) {
+ mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
+ kaddr[offset] &= ~mask_to_clear;
+ }
+}
+
static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
{
unsigned long distance = (src > dst) ? src - dst : dst - src;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index f4c1ae118..880d5292e 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -29,7 +29,6 @@
*/
#define EXTENT_BIO_COMPRESSED 1
#define EXTENT_BIO_TREE_LOG 2
-#define EXTENT_BIO_PARENT_LOCKED 4
#define EXTENT_BIO_FLAG_SHIFT 16
/* these are bit numbers for test/set bit */
@@ -199,17 +198,17 @@ int try_release_extent_mapping(struct extent_map_tree *map,
struct extent_io_tree *tree, struct page *page,
gfp_t mask);
int try_release_extent_buffer(struct page *page);
-int lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, struct extent_state **cached);
-int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end);
-int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached, gfp_t mask);
+ struct extent_state **cached);
+
+static inline int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
+{
+ return lock_extent_bits(tree, start, end, NULL);
+}
+
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent, int mirror_num);
-int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
- get_extent_t *get_extent, int mirror_num);
int __init extent_io_init(void);
void extent_io_exit(void);
@@ -221,39 +220,105 @@ void free_extent_state(struct extent_state *state);
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int filled,
struct extent_state *cached_state);
-int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, gfp_t mask);
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, gfp_t mask,
struct extent_changeset *changeset);
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int wake, int delete,
struct extent_state **cached, gfp_t mask);
-int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, gfp_t mask);
+
+static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
+ GFP_NOFS);
+}
+
+static inline int unlock_extent_cached(struct extent_io_tree *tree, u64 start,
+ u64 end, struct extent_state **cached, gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
+ mask);
+}
+
+static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
+ u64 end, unsigned bits, gfp_t mask)
+{
+ int wake = 0;
+
+ if (bits & EXTENT_LOCKED)
+ wake = 1;
+
+ return clear_extent_bit(tree, start, end, bits, wake, 0, NULL, mask);
+}
+
int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, gfp_t mask,
struct extent_changeset *changeset);
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, u64 *failed_start,
struct extent_state **cached_state, gfp_t mask);
-int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached_state, gfp_t mask);
-int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached_state, gfp_t mask);
-int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask);
-int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask);
-int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask);
+
+static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
+ u64 end, unsigned bits, gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, bits, NULL, NULL, mask);
+}
+
+static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
+ u64 end, struct extent_state **cached_state, gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
+ cached_state, mask);
+}
+
+static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start,
+ u64 end, gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL,
+ NULL, mask);
+}
+
+static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
+ u64 end, gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end,
+ EXTENT_DIRTY | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask);
+}
+
int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, unsigned clear_bits,
struct extent_state **cached_state, gfp_t mask);
-int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached_state, gfp_t mask);
-int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached_state, gfp_t mask);
+
+static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start,
+ u64 end, struct extent_state **cached_state, gfp_t mask)
+{
+ return set_extent_bit(tree, start, end,
+ EXTENT_DELALLOC | EXTENT_UPTODATE,
+ NULL, cached_state, mask);
+}
+
+static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start,
+ u64 end, struct extent_state **cached_state, gfp_t mask)
+{
+ return set_extent_bit(tree, start, end,
+ EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG,
+ NULL, cached_state, mask);
+}
+
+static inline int set_extent_new(struct extent_io_tree *tree, u64 start,
+ u64 end, gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, NULL, mask);
+}
+
+static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start,
+ u64 end, struct extent_state **cached_state, gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL,
+ cached_state, mask);
+}
+
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, unsigned bits,
struct extent_state **cached_state);
@@ -282,8 +347,10 @@ void set_page_extent_mapped(struct page *page);
struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start);
+struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
+ u64 start, unsigned long len);
struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
- u64 start);
+ u64 start);
struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start);
@@ -328,19 +395,25 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
unsigned long src_offset, unsigned long len);
void memset_extent_buffer(struct extent_buffer *eb, char c,
unsigned long start, unsigned long len);
+int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
+ unsigned long pos);
+void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
+ unsigned long pos, unsigned long len);
+void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
+ unsigned long pos, unsigned long len);
void clear_extent_buffer_dirty(struct extent_buffer *eb);
int set_extent_buffer_dirty(struct extent_buffer *eb);
-int set_extent_buffer_uptodate(struct extent_buffer *eb);
-int clear_extent_buffer_uptodate(struct extent_buffer *eb);
+void set_extent_buffer_uptodate(struct extent_buffer *eb);
+void clear_extent_buffer_uptodate(struct extent_buffer *eb);
int extent_buffer_uptodate(struct extent_buffer *eb);
int extent_buffer_under_io(struct extent_buffer *eb);
int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
unsigned long min_len, char **map,
unsigned long *map_start,
unsigned long *map_len);
-int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
-int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
-int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
+void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
+void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
+void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
struct page *locked_page,
unsigned bits_to_clear,
unsigned long page_ops);
@@ -357,7 +430,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
int mirror_num);
int clean_io_failure(struct inode *inode, u64 start, struct page *page,
unsigned int pg_offset);
-int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
+void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
int mirror_num);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 6a98bddd8..84fb56d5c 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -76,7 +76,7 @@ void free_extent_map(struct extent_map *em)
WARN_ON(extent_map_in_tree(em));
WARN_ON(!list_empty(&em->list));
if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
- kfree(em->bdev);
+ kfree(em->map_lookup);
kmem_cache_free(extent_map_cache, em);
}
}
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index b2991fd85..eb8b8fae0 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -32,7 +32,15 @@ struct extent_map {
u64 block_len;
u64 generation;
unsigned long flags;
- struct block_device *bdev;
+ union {
+ struct block_device *bdev;
+
+ /*
+ * used for chunk mappings
+ * flags & EXTENT_FLAG_FS_MAPPING must be set
+ */
+ struct map_lookup *map_lookup;
+ };
atomic_t refs;
unsigned int compress_type;
struct list_head list;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 58ece6558..a67e1c828 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -202,7 +202,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
}
if (bio->bi_iter.bi_size > PAGE_CACHE_SIZE * 8)
- path->reada = 2;
+ path->reada = READA_FORWARD;
WARN_ON(bio->bi_vcnt <= 0);
@@ -328,7 +328,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
if (search_commit) {
path->skip_locking = 1;
- path->reada = 2;
+ path->reada = READA_FORWARD;
path->search_commit_root = 1;
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 0f09526aa..098bb8f69 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -406,8 +406,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
/* simple helper to fault in pages and copy. This should go away
* and be replaced with calls into generic code.
*/
-static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
- size_t write_bytes,
+static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
struct page **prepared_pages,
struct iov_iter *i)
{
@@ -1394,7 +1393,7 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
if (start_pos < inode->i_size) {
struct btrfs_ordered_extent *ordered;
lock_extent_bits(&BTRFS_I(inode)->io_tree,
- start_pos, last_pos, 0, cached_state);
+ start_pos, last_pos, cached_state);
ordered = btrfs_lookup_ordered_range(inode, start_pos,
last_pos - start_pos + 1);
if (ordered &&
@@ -1588,8 +1587,7 @@ again:
ret = 0;
}
- copied = btrfs_copy_from_user(pos, num_pages,
- write_bytes, pages, i);
+ copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
/*
* if we have trouble faulting in the pages, fall
@@ -1764,17 +1762,17 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
loff_t pos;
size_t count;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
err = generic_write_checks(iocb, from);
if (err <= 0) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
current->backing_dev_info = inode_to_bdi(inode);
err = file_remove_privs(file);
if (err) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
goto out;
}
@@ -1785,7 +1783,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
* to stop this write operation to ensure FS consistency.
*/
if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
err = -EROFS;
goto out;
}
@@ -1806,7 +1804,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
end_pos = round_up(pos + count, root->sectorsize);
err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
if (err) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
goto out;
}
}
@@ -1822,7 +1820,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
iocb->ki_pos = pos + num_written;
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/*
* We also have to set last_sub_trans to the current log transid,
@@ -1911,7 +1909,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
atomic_inc(&root->log_batch);
full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
@@ -1963,7 +1961,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
ret = start_ordered_ops(inode, start, end);
}
if (ret) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
goto out;
}
atomic_inc(&root->log_batch);
@@ -2009,7 +2007,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
*/
clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
goto out;
}
@@ -2033,7 +2031,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
goto out;
}
trans->sync = true;
@@ -2056,7 +2054,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
* file again, but that will end up using the synchronization
* inside btrfs_sync_log to keep things safe.
*/
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/*
* If any of the ordered extents had an error, just return it to user
@@ -2305,7 +2303,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
ret = find_first_non_hole(inode, &offset, &len);
if (ret < 0)
@@ -2345,7 +2343,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
truncated_page = true;
ret = btrfs_truncate_page(inode, offset, 0, 0);
if (ret) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
}
@@ -2398,7 +2396,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
truncate_pagecache_range(inode, lockstart, lockend);
lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- 0, &cached_state);
+ &cached_state);
ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
/*
@@ -2421,7 +2419,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
ret = btrfs_wait_ordered_range(inode, lockstart,
lockend - lockstart + 1);
if (ret) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
}
@@ -2576,7 +2574,7 @@ out_only_mutex:
ret = btrfs_end_transaction(trans, root);
}
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (ret && !err)
err = ret;
return err;
@@ -2660,7 +2658,7 @@ static long btrfs_fallocate(struct file *file, int mode,
if (ret < 0)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = inode_newsize_ok(inode, alloc_end);
if (ret)
goto out;
@@ -2705,7 +2703,7 @@ static long btrfs_fallocate(struct file *file, int mode,
* transaction
*/
lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
- locked_end, 0, &cached_state);
+ locked_end, &cached_state);
ordered = btrfs_lookup_first_ordered_extent(inode,
alloc_end - 1);
if (ordered &&
@@ -2818,7 +2816,7 @@ out:
* So this is completely used as cleanup.
*/
btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/* Let go of our reservation. */
btrfs_free_reserved_data_space(inode, alloc_start,
alloc_end - alloc_start);
@@ -2852,7 +2850,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
lockend--;
len = lockend - lockstart + 1;
- lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state);
while (start < inode->i_size) {
@@ -2894,7 +2892,7 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
struct inode *inode = file->f_mapping->host;
int ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
switch (whence) {
case SEEK_END:
case SEEK_CUR:
@@ -2903,20 +2901,20 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
case SEEK_DATA:
case SEEK_HOLE:
if (offset >= i_size_read(inode)) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return -ENXIO;
}
ret = find_desired_extent(inode, &offset, whence);
if (ret) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
}
offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return offset;
}
@@ -2934,6 +2932,9 @@ const struct file_operations btrfs_file_operations = {
#ifdef CONFIG_COMPAT
.compat_ioctl = btrfs_ioctl,
#endif
+ .copy_file_range = btrfs_copy_file_range,
+ .clone_file_range = btrfs_clone_file_range,
+ .dedupe_file_range = btrfs_dedupe_file_range,
};
void btrfs_auto_defrag_exit(void)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index cfe99bec4..8f835bfa1 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -30,7 +30,7 @@
#include "volumes.h"
#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
-#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
+#define MAX_CACHE_BYTES_PER_GIG SZ_32K
struct btrfs_trim_range {
u64 start;
@@ -1086,14 +1086,11 @@ write_pinned_extent_entries(struct btrfs_root *root,
static noinline_for_stack int
write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list)
{
- struct list_head *pos, *n;
+ struct btrfs_free_space *entry, *next;
int ret;
/* Write out the bitmaps */
- list_for_each_safe(pos, n, bitmap_list) {
- struct btrfs_free_space *entry =
- list_entry(pos, struct btrfs_free_space, list);
-
+ list_for_each_entry_safe(entry, next, bitmap_list, list) {
ret = io_ctl_add_bitmap(io_ctl, entry->bitmap);
if (ret)
return -ENOSPC;
@@ -1119,13 +1116,10 @@ static int flush_dirty_cache(struct inode *inode)
static void noinline_for_stack
cleanup_bitmap_list(struct list_head *bitmap_list)
{
- struct list_head *pos, *n;
+ struct btrfs_free_space *entry, *next;
- list_for_each_safe(pos, n, bitmap_list) {
- struct btrfs_free_space *entry =
- list_entry(pos, struct btrfs_free_space, list);
+ list_for_each_entry_safe(entry, next, bitmap_list, list)
list_del_init(&entry->list);
- }
}
static void noinline_for_stack
@@ -1261,7 +1255,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
goto out;
lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
- 0, &cached_state);
+ &cached_state);
io_ctl_set_generation(io_ctl, trans->transid);
@@ -1656,11 +1650,10 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
* at or below 32k, so we need to adjust how much memory we allow to be
* used by extent based free space tracking
*/
- if (size < 1024 * 1024 * 1024)
+ if (size < SZ_1G)
max_bytes = MAX_CACHE_BYTES_PER_GIG;
else
- max_bytes = MAX_CACHE_BYTES_PER_GIG *
- div_u64(size, 1024 * 1024 * 1024);
+ max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G);
/*
* we want to account for 1 more bitmap than what we have so we can make
@@ -2016,7 +2009,7 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
return true;
}
-static struct btrfs_free_space_op free_space_op = {
+static const struct btrfs_free_space_op free_space_op = {
.recalc_thresholds = recalculate_thresholds,
.use_bitmap = use_bitmap,
};
@@ -2489,8 +2482,7 @@ void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
* track of free space, and if we pass 1/2 of that we want to
* start converting things over to using bitmaps
*/
- ctl->extents_thresh = ((1024 * 32) / 2) /
- sizeof(struct btrfs_free_space);
+ ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space);
}
/*
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index f251865eb..33178c490 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -37,7 +37,7 @@ struct btrfs_free_space_ctl {
int total_bitmaps;
int unit;
u64 start;
- struct btrfs_free_space_op *op;
+ const struct btrfs_free_space_op *op;
void *private;
struct mutex cache_writeout_mutex;
struct list_head trimming_ranges;
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
new file mode 100644
index 000000000..53dbeaf6c
--- /dev/null
+++ b/fs/btrfs/free-space-tree.c
@@ -0,0 +1,1605 @@
+/*
+ * Copyright (C) 2015 Facebook. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+#include "ctree.h"
+#include "disk-io.h"
+#include "locking.h"
+#include "free-space-tree.h"
+#include "transaction.h"
+
+static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path);
+
+void set_free_space_tree_thresholds(struct btrfs_block_group_cache *cache)
+{
+ u32 bitmap_range;
+ size_t bitmap_size;
+ u64 num_bitmaps, total_bitmap_size;
+
+ /*
+ * We convert to bitmaps when the disk space required for using extents
+ * exceeds that required for using bitmaps.
+ */
+ bitmap_range = cache->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
+ num_bitmaps = div_u64(cache->key.offset + bitmap_range - 1,
+ bitmap_range);
+ bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
+ total_bitmap_size = num_bitmaps * bitmap_size;
+ cache->bitmap_high_thresh = div_u64(total_bitmap_size,
+ sizeof(struct btrfs_item));
+
+ /*
+ * We allow for a small buffer between the high threshold and low
+ * threshold to avoid thrashing back and forth between the two formats.
+ */
+ if (cache->bitmap_high_thresh > 100)
+ cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
+ else
+ cache->bitmap_low_thresh = 0;
+}
+
+static int add_new_free_space_info(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path)
+{
+ struct btrfs_root *root = fs_info->free_space_root;
+ struct btrfs_free_space_info *info;
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ int ret;
+
+ key.objectid = block_group->key.objectid;
+ key.type = BTRFS_FREE_SPACE_INFO_KEY;
+ key.offset = block_group->key.offset;
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
+ if (ret)
+ goto out;
+
+ leaf = path->nodes[0];
+ info = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_free_space_info);
+ btrfs_set_free_space_extent_count(leaf, info, 0);
+ btrfs_set_free_space_flags(leaf, info, 0);
+ btrfs_mark_buffer_dirty(leaf);
+
+ ret = 0;
+out:
+ btrfs_release_path(path);
+ return ret;
+}
+
+struct btrfs_free_space_info *
+search_free_space_info(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path, int cow)
+{
+ struct btrfs_root *root = fs_info->free_space_root;
+ struct btrfs_key key;
+ int ret;
+
+ key.objectid = block_group->key.objectid;
+ key.type = BTRFS_FREE_SPACE_INFO_KEY;
+ key.offset = block_group->key.offset;
+
+ ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ if (ret != 0) {
+ btrfs_warn(fs_info, "missing free space info for %llu\n",
+ block_group->key.objectid);
+ ASSERT(0);
+ return ERR_PTR(-ENOENT);
+ }
+
+ return btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_free_space_info);
+}
+
+/*
+ * btrfs_search_slot() but we're looking for the greatest key less than the
+ * passed key.
+ */
+static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_key *key, struct btrfs_path *p,
+ int ins_len, int cow)
+{
+ int ret;
+
+ ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
+ if (ret < 0)
+ return ret;
+
+ if (ret == 0) {
+ ASSERT(0);
+ return -EIO;
+ }
+
+ if (p->slots[0] == 0) {
+ ASSERT(0);
+ return -EIO;
+ }
+ p->slots[0]--;
+
+ return 0;
+}
+
+static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
+{
+ return DIV_ROUND_UP((u32)div_u64(size, sectorsize), BITS_PER_BYTE);
+}
+
+static unsigned long *alloc_bitmap(u32 bitmap_size)
+{
+ void *mem;
+
+ /*
+ * The allocation size varies, observed numbers were < 4K up to 16K.
+ * Using vmalloc unconditionally would be too heavy, we'll try
+ * contiguous allocations first.
+ */
+ if (bitmap_size <= PAGE_SIZE)
+ return kzalloc(bitmap_size, GFP_NOFS);
+
+ mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_NOWARN);
+ if (mem)
+ return mem;
+
+ return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO,
+ PAGE_KERNEL);
+}
+
+int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path)
+{
+ struct btrfs_root *root = fs_info->free_space_root;
+ struct btrfs_free_space_info *info;
+ struct btrfs_key key, found_key;
+ struct extent_buffer *leaf;
+ unsigned long *bitmap;
+ char *bitmap_cursor;
+ u64 start, end;
+ u64 bitmap_range, i;
+ u32 bitmap_size, flags, expected_extent_count;
+ u32 extent_count = 0;
+ int done = 0, nr;
+ int ret;
+
+ bitmap_size = free_space_bitmap_size(block_group->key.offset,
+ block_group->sectorsize);
+ bitmap = alloc_bitmap(bitmap_size);
+ if (!bitmap) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ start = block_group->key.objectid;
+ end = block_group->key.objectid + block_group->key.offset;
+
+ key.objectid = end - 1;
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+
+ while (!done) {
+ ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
+ if (ret)
+ goto out;
+
+ leaf = path->nodes[0];
+ nr = 0;
+ path->slots[0]++;
+ while (path->slots[0] > 0) {
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
+
+ if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
+ ASSERT(found_key.objectid == block_group->key.objectid);
+ ASSERT(found_key.offset == block_group->key.offset);
+ done = 1;
+ break;
+ } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
+ u64 first, last;
+
+ ASSERT(found_key.objectid >= start);
+ ASSERT(found_key.objectid < end);
+ ASSERT(found_key.objectid + found_key.offset <= end);
+
+ first = div_u64(found_key.objectid - start,
+ block_group->sectorsize);
+ last = div_u64(found_key.objectid + found_key.offset - start,
+ block_group->sectorsize);
+ bitmap_set(bitmap, first, last - first);
+
+ extent_count++;
+ nr++;
+ path->slots[0]--;
+ } else {
+ ASSERT(0);
+ }
+ }
+
+ ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
+ if (ret)
+ goto out;
+ btrfs_release_path(path);
+ }
+
+ info = search_free_space_info(trans, fs_info, block_group, path, 1);
+ if (IS_ERR(info)) {
+ ret = PTR_ERR(info);
+ goto out;
+ }
+ leaf = path->nodes[0];
+ flags = btrfs_free_space_flags(leaf, info);
+ flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
+ btrfs_set_free_space_flags(leaf, info, flags);
+ expected_extent_count = btrfs_free_space_extent_count(leaf, info);
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(path);
+
+ if (extent_count != expected_extent_count) {
+ btrfs_err(fs_info, "incorrect extent count for %llu; counted %u, expected %u",
+ block_group->key.objectid, extent_count,
+ expected_extent_count);
+ ASSERT(0);
+ ret = -EIO;
+ goto out;
+ }
+
+ bitmap_cursor = (char *)bitmap;
+ bitmap_range = block_group->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
+ i = start;
+ while (i < end) {
+ unsigned long ptr;
+ u64 extent_size;
+ u32 data_size;
+
+ extent_size = min(end - i, bitmap_range);
+ data_size = free_space_bitmap_size(extent_size,
+ block_group->sectorsize);
+
+ key.objectid = i;
+ key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
+ key.offset = extent_size;
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key,
+ data_size);
+ if (ret)
+ goto out;
+
+ leaf = path->nodes[0];
+ ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ write_extent_buffer(leaf, bitmap_cursor, ptr,
+ data_size);
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(path);
+
+ i += extent_size;
+ bitmap_cursor += data_size;
+ }
+
+ ret = 0;
+out:
+ kvfree(bitmap);
+ if (ret)
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+}
+
+int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path)
+{
+ struct btrfs_root *root = fs_info->free_space_root;
+ struct btrfs_free_space_info *info;
+ struct btrfs_key key, found_key;
+ struct extent_buffer *leaf;
+ unsigned long *bitmap;
+ u64 start, end;
+ /* Initialize to silence GCC. */
+ u64 extent_start = 0;
+ u64 offset;
+ u32 bitmap_size, flags, expected_extent_count;
+ int prev_bit = 0, bit, bitnr;
+ u32 extent_count = 0;
+ int done = 0, nr;
+ int ret;
+
+ bitmap_size = free_space_bitmap_size(block_group->key.offset,
+ block_group->sectorsize);
+ bitmap = alloc_bitmap(bitmap_size);
+ if (!bitmap) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ start = block_group->key.objectid;
+ end = block_group->key.objectid + block_group->key.offset;
+
+ key.objectid = end - 1;
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+
+ while (!done) {
+ ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
+ if (ret)
+ goto out;
+
+ leaf = path->nodes[0];
+ nr = 0;
+ path->slots[0]++;
+ while (path->slots[0] > 0) {
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
+
+ if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
+ ASSERT(found_key.objectid == block_group->key.objectid);
+ ASSERT(found_key.offset == block_group->key.offset);
+ done = 1;
+ break;
+ } else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
+ unsigned long ptr;
+ char *bitmap_cursor;
+ u32 bitmap_pos, data_size;
+
+ ASSERT(found_key.objectid >= start);
+ ASSERT(found_key.objectid < end);
+ ASSERT(found_key.objectid + found_key.offset <= end);
+
+ bitmap_pos = div_u64(found_key.objectid - start,
+ block_group->sectorsize *
+ BITS_PER_BYTE);
+ bitmap_cursor = ((char *)bitmap) + bitmap_pos;
+ data_size = free_space_bitmap_size(found_key.offset,
+ block_group->sectorsize);
+
+ ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1);
+ read_extent_buffer(leaf, bitmap_cursor, ptr,
+ data_size);
+
+ nr++;
+ path->slots[0]--;
+ } else {
+ ASSERT(0);
+ }
+ }
+
+ ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
+ if (ret)
+ goto out;
+ btrfs_release_path(path);
+ }
+
+ info = search_free_space_info(trans, fs_info, block_group, path, 1);
+ if (IS_ERR(info)) {
+ ret = PTR_ERR(info);
+ goto out;
+ }
+ leaf = path->nodes[0];
+ flags = btrfs_free_space_flags(leaf, info);
+ flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
+ btrfs_set_free_space_flags(leaf, info, flags);
+ expected_extent_count = btrfs_free_space_extent_count(leaf, info);
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(path);
+
+ offset = start;
+ bitnr = 0;
+ while (offset < end) {
+ bit = !!test_bit(bitnr, bitmap);
+ if (prev_bit == 0 && bit == 1) {
+ extent_start = offset;
+ } else if (prev_bit == 1 && bit == 0) {
+ key.objectid = extent_start;
+ key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
+ key.offset = offset - extent_start;
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
+ if (ret)
+ goto out;
+ btrfs_release_path(path);
+
+ extent_count++;
+ }
+ prev_bit = bit;
+ offset += block_group->sectorsize;
+ bitnr++;
+ }
+ if (prev_bit == 1) {
+ key.objectid = extent_start;
+ key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
+ key.offset = end - extent_start;
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
+ if (ret)
+ goto out;
+ btrfs_release_path(path);
+
+ extent_count++;
+ }
+
+ if (extent_count != expected_extent_count) {
+ btrfs_err(fs_info, "incorrect extent count for %llu; counted %u, expected %u",
+ block_group->key.objectid, extent_count,
+ expected_extent_count);
+ ASSERT(0);
+ ret = -EIO;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ kvfree(bitmap);
+ if (ret)
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+}
+
+static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path,
+ int new_extents)
+{
+ struct btrfs_free_space_info *info;
+ u32 flags;
+ u32 extent_count;
+ int ret = 0;
+
+ if (new_extents == 0)
+ return 0;
+
+ info = search_free_space_info(trans, fs_info, block_group, path, 1);
+ if (IS_ERR(info)) {
+ ret = PTR_ERR(info);
+ goto out;
+ }
+ flags = btrfs_free_space_flags(path->nodes[0], info);
+ extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
+
+ extent_count += new_extents;
+ btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+ btrfs_release_path(path);
+
+ if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
+ extent_count > block_group->bitmap_high_thresh) {
+ ret = convert_free_space_to_bitmaps(trans, fs_info, block_group,
+ path);
+ } else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
+ extent_count < block_group->bitmap_low_thresh) {
+ ret = convert_free_space_to_extents(trans, fs_info, block_group,
+ path);
+ }
+
+out:
+ return ret;
+}
+
+int free_space_test_bit(struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path, u64 offset)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ u64 found_start, found_end;
+ unsigned long ptr, i;
+
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
+
+ found_start = key.objectid;
+ found_end = key.objectid + key.offset;
+ ASSERT(offset >= found_start && offset < found_end);
+
+ ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ i = div_u64(offset - found_start, block_group->sectorsize);
+ return !!extent_buffer_test_bit(leaf, ptr, i);
+}
+
+static void free_space_set_bits(struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path, u64 *start, u64 *size,
+ int bit)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ u64 end = *start + *size;
+ u64 found_start, found_end;
+ unsigned long ptr, first, last;
+
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
+
+ found_start = key.objectid;
+ found_end = key.objectid + key.offset;
+ ASSERT(*start >= found_start && *start < found_end);
+ ASSERT(end > found_start);
+
+ if (end > found_end)
+ end = found_end;
+
+ ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ first = div_u64(*start - found_start, block_group->sectorsize);
+ last = div_u64(end - found_start, block_group->sectorsize);
+ if (bit)
+ extent_buffer_bitmap_set(leaf, ptr, first, last - first);
+ else
+ extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
+ btrfs_mark_buffer_dirty(leaf);
+
+ *size -= end - *start;
+ *start = end;
+}
+
+/*
+ * We can't use btrfs_next_item() in modify_free_space_bitmap() because
+ * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
+ * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
+ * looking for.
+ */
+static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *p)
+{
+ struct btrfs_key key;
+
+ if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
+ p->slots[0]++;
+ return 0;
+ }
+
+ btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
+ btrfs_release_path(p);
+
+ key.objectid += key.offset;
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+
+ return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
+}
+
+/*
+ * If remove is 1, then we are removing free space, thus clearing bits in the
+ * bitmap. If remove is 0, then we are adding free space, thus setting bits in
+ * the bitmap.
+ */
+static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path,
+ u64 start, u64 size, int remove)
+{
+ struct btrfs_root *root = fs_info->free_space_root;
+ struct btrfs_key key;
+ u64 end = start + size;
+ u64 cur_start, cur_size;
+ int prev_bit, next_bit;
+ int new_extents;
+ int ret;
+
+ /*
+ * Read the bit for the block immediately before the extent of space if
+ * that block is within the block group.
+ */
+ if (start > block_group->key.objectid) {
+ u64 prev_block = start - block_group->sectorsize;
+
+ key.objectid = prev_block;
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
+ if (ret)
+ goto out;
+
+ prev_bit = free_space_test_bit(block_group, path, prev_block);
+
+ /* The previous block may have been in the previous bitmap. */
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (start >= key.objectid + key.offset) {
+ ret = free_space_next_bitmap(trans, root, path);
+ if (ret)
+ goto out;
+ }
+ } else {
+ key.objectid = start;
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
+ if (ret)
+ goto out;
+
+ prev_bit = -1;
+ }
+
+ /*
+ * Iterate over all of the bitmaps overlapped by the extent of space,
+ * clearing/setting bits as required.
+ */
+ cur_start = start;
+ cur_size = size;
+ while (1) {
+ free_space_set_bits(block_group, path, &cur_start, &cur_size,
+ !remove);
+ if (cur_size == 0)
+ break;
+ ret = free_space_next_bitmap(trans, root, path);
+ if (ret)
+ goto out;
+ }
+
+ /*
+ * Read the bit for the block immediately after the extent of space if
+ * that block is within the block group.
+ */
+ if (end < block_group->key.objectid + block_group->key.offset) {
+ /* The next block may be in the next bitmap. */
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (end >= key.objectid + key.offset) {
+ ret = free_space_next_bitmap(trans, root, path);
+ if (ret)
+ goto out;
+ }
+
+ next_bit = free_space_test_bit(block_group, path, end);
+ } else {
+ next_bit = -1;
+ }
+
+ if (remove) {
+ new_extents = -1;
+ if (prev_bit == 1) {
+ /* Leftover on the left. */
+ new_extents++;
+ }
+ if (next_bit == 1) {
+ /* Leftover on the right. */
+ new_extents++;
+ }
+ } else {
+ new_extents = 1;
+ if (prev_bit == 1) {
+ /* Merging with neighbor on the left. */
+ new_extents--;
+ }
+ if (next_bit == 1) {
+ /* Merging with neighbor on the right. */
+ new_extents--;
+ }
+ }
+
+ btrfs_release_path(path);
+ ret = update_free_space_extent_count(trans, fs_info, block_group, path,
+ new_extents);
+
+out:
+ return ret;
+}
+
+static int remove_free_space_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path,
+ u64 start, u64 size)
+{
+ struct btrfs_root *root = fs_info->free_space_root;
+ struct btrfs_key key;
+ u64 found_start, found_end;
+ u64 end = start + size;
+ int new_extents = -1;
+ int ret;
+
+ key.objectid = start;
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
+ if (ret)
+ goto out;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+ ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
+
+ found_start = key.objectid;
+ found_end = key.objectid + key.offset;
+ ASSERT(start >= found_start && end <= found_end);
+
+ /*
+ * Okay, now that we've found the free space extent which contains the
+ * free space that we are removing, there are four cases:
+ *
+ * 1. We're using the whole extent: delete the key we found and
+ * decrement the free space extent count.
+ * 2. We are using part of the extent starting at the beginning: delete
+ * the key we found and insert a new key representing the leftover at
+ * the end. There is no net change in the number of extents.
+ * 3. We are using part of the extent ending at the end: delete the key
+ * we found and insert a new key representing the leftover at the
+ * beginning. There is no net change in the number of extents.
+ * 4. We are using part of the extent in the middle: delete the key we
+ * found and insert two new keys representing the leftovers on each
+ * side. Where we used to have one extent, we now have two, so increment
+ * the extent count. We may need to convert the block group to bitmaps
+ * as a result.
+ */
+
+ /* Delete the existing key (cases 1-4). */
+ ret = btrfs_del_item(trans, root, path);
+ if (ret)
+ goto out;
+
+ /* Add a key for leftovers at the beginning (cases 3 and 4). */
+ if (start > found_start) {
+ key.objectid = found_start;
+ key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
+ key.offset = start - found_start;
+
+ btrfs_release_path(path);
+ ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
+ if (ret)
+ goto out;
+ new_extents++;
+ }
+
+ /* Add a key for leftovers at the end (cases 2 and 4). */
+ if (end < found_end) {
+ key.objectid = end;
+ key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
+ key.offset = found_end - end;
+
+ btrfs_release_path(path);
+ ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
+ if (ret)
+ goto out;
+ new_extents++;
+ }
+
+ btrfs_release_path(path);
+ ret = update_free_space_extent_count(trans, fs_info, block_group, path,
+ new_extents);
+
+out:
+ return ret;
+}
+
+int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path, u64 start, u64 size)
+{
+ struct btrfs_free_space_info *info;
+ u32 flags;
+ int ret;
+
+ if (block_group->needs_free_space) {
+ ret = __add_block_group_free_space(trans, fs_info, block_group,
+ path);
+ if (ret)
+ return ret;
+ }
+
+ info = search_free_space_info(NULL, fs_info, block_group, path, 0);
+ if (IS_ERR(info))
+ return PTR_ERR(info);
+ flags = btrfs_free_space_flags(path->nodes[0], info);
+ btrfs_release_path(path);
+
+ if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
+ return modify_free_space_bitmap(trans, fs_info, block_group,
+ path, start, size, 1);
+ } else {
+ return remove_free_space_extent(trans, fs_info, block_group,
+ path, start, size);
+ }
+}
+
+int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ u64 start, u64 size)
+{
+ struct btrfs_block_group_cache *block_group;
+ struct btrfs_path *path;
+ int ret;
+
+ if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+ return 0;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ block_group = btrfs_lookup_block_group(fs_info, start);
+ if (!block_group) {
+ ASSERT(0);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ mutex_lock(&block_group->free_space_lock);
+ ret = __remove_from_free_space_tree(trans, fs_info, block_group, path,
+ start, size);
+ mutex_unlock(&block_group->free_space_lock);
+
+ btrfs_put_block_group(block_group);
+out:
+ btrfs_free_path(path);
+ if (ret)
+ btrfs_abort_transaction(trans, fs_info->free_space_root, ret);
+ return ret;
+}
+
+static int add_free_space_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path,
+ u64 start, u64 size)
+{
+ struct btrfs_root *root = fs_info->free_space_root;
+ struct btrfs_key key, new_key;
+ u64 found_start, found_end;
+ u64 end = start + size;
+ int new_extents = 1;
+ int ret;
+
+ /*
+ * We are adding a new extent of free space, but we need to merge
+ * extents. There are four cases here:
+ *
+ * 1. The new extent does not have any immediate neighbors to merge
+ * with: add the new key and increment the free space extent count. We
+ * may need to convert the block group to bitmaps as a result.
+ * 2. The new extent has an immediate neighbor before it: remove the
+ * previous key and insert a new key combining both of them. There is no
+ * net change in the number of extents.
+ * 3. The new extent has an immediate neighbor after it: remove the next
+ * key and insert a new key combining both of them. There is no net
+ * change in the number of extents.
+ * 4. The new extent has immediate neighbors on both sides: remove both
+ * of the keys and insert a new key combining all of them. Where we used
+ * to have two extents, we now have one, so decrement the extent count.
+ */
+
+ new_key.objectid = start;
+ new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
+ new_key.offset = size;
+
+ /* Search for a neighbor on the left. */
+ if (start == block_group->key.objectid)
+ goto right;
+ key.objectid = start - 1;
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
+ if (ret)
+ goto out;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+ if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
+ ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
+ btrfs_release_path(path);
+ goto right;
+ }
+
+ found_start = key.objectid;
+ found_end = key.objectid + key.offset;
+ ASSERT(found_start >= block_group->key.objectid &&
+ found_end > block_group->key.objectid);
+ ASSERT(found_start < start && found_end <= start);
+
+ /*
+ * Delete the neighbor on the left and absorb it into the new key (cases
+ * 2 and 4).
+ */
+ if (found_end == start) {
+ ret = btrfs_del_item(trans, root, path);
+ if (ret)
+ goto out;
+ new_key.objectid = found_start;
+ new_key.offset += key.offset;
+ new_extents--;
+ }
+ btrfs_release_path(path);
+
+right:
+ /* Search for a neighbor on the right. */
+ if (end == block_group->key.objectid + block_group->key.offset)
+ goto insert;
+ key.objectid = end;
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
+ if (ret)
+ goto out;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+ if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
+ ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
+ btrfs_release_path(path);
+ goto insert;
+ }
+
+ found_start = key.objectid;
+ found_end = key.objectid + key.offset;
+ ASSERT(found_start >= block_group->key.objectid &&
+ found_end > block_group->key.objectid);
+ ASSERT((found_start < start && found_end <= start) ||
+ (found_start >= end && found_end > end));
+
+ /*
+ * Delete the neighbor on the right and absorb it into the new key
+ * (cases 3 and 4).
+ */
+ if (found_start == end) {
+ ret = btrfs_del_item(trans, root, path);
+ if (ret)
+ goto out;
+ new_key.offset += key.offset;
+ new_extents--;
+ }
+ btrfs_release_path(path);
+
+insert:
+ /* Insert the new key (cases 1-4). */
+ ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
+ if (ret)
+ goto out;
+
+ btrfs_release_path(path);
+ ret = update_free_space_extent_count(trans, fs_info, block_group, path,
+ new_extents);
+
+out:
+ return ret;
+}
+
+int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path, u64 start, u64 size)
+{
+ struct btrfs_free_space_info *info;
+ u32 flags;
+ int ret;
+
+ if (block_group->needs_free_space) {
+ ret = __add_block_group_free_space(trans, fs_info, block_group,
+ path);
+ if (ret)
+ return ret;
+ }
+
+ info = search_free_space_info(NULL, fs_info, block_group, path, 0);
+ if (IS_ERR(info))
+ return PTR_ERR(info);
+ flags = btrfs_free_space_flags(path->nodes[0], info);
+ btrfs_release_path(path);
+
+ if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
+ return modify_free_space_bitmap(trans, fs_info, block_group,
+ path, start, size, 0);
+ } else {
+ return add_free_space_extent(trans, fs_info, block_group, path,
+ start, size);
+ }
+}
+
+int add_to_free_space_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ u64 start, u64 size)
+{
+ struct btrfs_block_group_cache *block_group;
+ struct btrfs_path *path;
+ int ret;
+
+ if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+ return 0;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ block_group = btrfs_lookup_block_group(fs_info, start);
+ if (!block_group) {
+ ASSERT(0);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ mutex_lock(&block_group->free_space_lock);
+ ret = __add_to_free_space_tree(trans, fs_info, block_group, path, start,
+ size);
+ mutex_unlock(&block_group->free_space_lock);
+
+ btrfs_put_block_group(block_group);
+out:
+ btrfs_free_path(path);
+ if (ret)
+ btrfs_abort_transaction(trans, fs_info->free_space_root, ret);
+ return ret;
+}
+
+/*
+ * Populate the free space tree by walking the extent tree. Operations on the
+ * extent tree that happen as a result of writes to the free space tree will go
+ * through the normal add/remove hooks.
+ */
+static int populate_free_space_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group)
+{
+ struct btrfs_root *extent_root = fs_info->extent_root;
+ struct btrfs_path *path, *path2;
+ struct btrfs_key key;
+ u64 start, end;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ path->reada = 1;
+
+ path2 = btrfs_alloc_path();
+ if (!path2) {
+ btrfs_free_path(path);
+ return -ENOMEM;
+ }
+
+ ret = add_new_free_space_info(trans, fs_info, block_group, path2);
+ if (ret)
+ goto out;
+
+ mutex_lock(&block_group->free_space_lock);
+
+ /*
+ * Iterate through all of the extent and metadata items in this block
+ * group, adding the free space between them and the free space at the
+ * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
+ * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
+ * contained in.
+ */
+ key.objectid = block_group->key.objectid;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
+ if (ret < 0)
+ goto out_locked;
+ ASSERT(ret == 0);
+
+ start = block_group->key.objectid;
+ end = block_group->key.objectid + block_group->key.offset;
+ while (1) {
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+ if (key.type == BTRFS_EXTENT_ITEM_KEY ||
+ key.type == BTRFS_METADATA_ITEM_KEY) {
+ if (key.objectid >= end)
+ break;
+
+ if (start < key.objectid) {
+ ret = __add_to_free_space_tree(trans, fs_info,
+ block_group,
+ path2, start,
+ key.objectid -
+ start);
+ if (ret)
+ goto out_locked;
+ }
+ start = key.objectid;
+ if (key.type == BTRFS_METADATA_ITEM_KEY)
+ start += fs_info->tree_root->nodesize;
+ else
+ start += key.offset;
+ } else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
+ if (key.objectid != block_group->key.objectid)
+ break;
+ }
+
+ ret = btrfs_next_item(extent_root, path);
+ if (ret < 0)
+ goto out_locked;
+ if (ret)
+ break;
+ }
+ if (start < end) {
+ ret = __add_to_free_space_tree(trans, fs_info, block_group,
+ path2, start, end - start);
+ if (ret)
+ goto out_locked;
+ }
+
+ ret = 0;
+out_locked:
+ mutex_unlock(&block_group->free_space_lock);
+out:
+ btrfs_free_path(path2);
+ btrfs_free_path(path);
+ return ret;
+}
+
+int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *tree_root = fs_info->tree_root;
+ struct btrfs_root *free_space_root;
+ struct btrfs_block_group_cache *block_group;
+ struct rb_node *node;
+ int ret;
+
+ trans = btrfs_start_transaction(tree_root, 0);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ fs_info->creating_free_space_tree = 1;
+ free_space_root = btrfs_create_tree(trans, fs_info,
+ BTRFS_FREE_SPACE_TREE_OBJECTID);
+ if (IS_ERR(free_space_root)) {
+ ret = PTR_ERR(free_space_root);
+ goto abort;
+ }
+ fs_info->free_space_root = free_space_root;
+
+ node = rb_first(&fs_info->block_group_cache_tree);
+ while (node) {
+ block_group = rb_entry(node, struct btrfs_block_group_cache,
+ cache_node);
+ ret = populate_free_space_tree(trans, fs_info, block_group);
+ if (ret)
+ goto abort;
+ node = rb_next(node);
+ }
+
+ btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
+ fs_info->creating_free_space_tree = 0;
+
+ ret = btrfs_commit_transaction(trans, tree_root);
+ if (ret)
+ return ret;
+
+ return 0;
+
+abort:
+ fs_info->creating_free_space_tree = 0;
+ btrfs_abort_transaction(trans, tree_root, ret);
+ btrfs_end_transaction(trans, tree_root);
+ return ret;
+}
+
+static int clear_free_space_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ int nr;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ path->leave_spinning = 1;
+
+ key.objectid = 0;
+ key.type = 0;
+ key.offset = 0;
+
+ while (1) {
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret < 0)
+ goto out;
+
+ nr = btrfs_header_nritems(path->nodes[0]);
+ if (!nr)
+ break;
+
+ path->slots[0] = 0;
+ ret = btrfs_del_items(trans, root, path, 0, nr);
+ if (ret)
+ goto out;
+
+ btrfs_release_path(path);
+ }
+
+ ret = 0;
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *tree_root = fs_info->tree_root;
+ struct btrfs_root *free_space_root = fs_info->free_space_root;
+ int ret;
+
+ trans = btrfs_start_transaction(tree_root, 0);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
+ fs_info->free_space_root = NULL;
+
+ ret = clear_free_space_tree(trans, free_space_root);
+ if (ret)
+ goto abort;
+
+ ret = btrfs_del_root(trans, tree_root, &free_space_root->root_key);
+ if (ret)
+ goto abort;
+
+ list_del(&free_space_root->dirty_list);
+
+ btrfs_tree_lock(free_space_root->node);
+ clean_tree_block(trans, tree_root->fs_info, free_space_root->node);
+ btrfs_tree_unlock(free_space_root->node);
+ btrfs_free_tree_block(trans, free_space_root, free_space_root->node,
+ 0, 1);
+
+ free_extent_buffer(free_space_root->node);
+ free_extent_buffer(free_space_root->commit_root);
+ kfree(free_space_root);
+
+ ret = btrfs_commit_transaction(trans, tree_root);
+ if (ret)
+ return ret;
+
+ return 0;
+
+abort:
+ btrfs_abort_transaction(trans, tree_root, ret);
+ btrfs_end_transaction(trans, tree_root);
+ return ret;
+}
+
+static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path)
+{
+ u64 start, end;
+ int ret;
+
+ start = block_group->key.objectid;
+ end = block_group->key.objectid + block_group->key.offset;
+
+ block_group->needs_free_space = 0;
+
+ ret = add_new_free_space_info(trans, fs_info, block_group, path);
+ if (ret)
+ return ret;
+
+ return __add_to_free_space_tree(trans, fs_info, block_group, path,
+ block_group->key.objectid,
+ block_group->key.offset);
+}
+
+int add_block_group_free_space(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group)
+{
+ struct btrfs_path *path = NULL;
+ int ret = 0;
+
+ if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+ return 0;
+
+ mutex_lock(&block_group->free_space_lock);
+ if (!block_group->needs_free_space)
+ goto out;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = __add_block_group_free_space(trans, fs_info, block_group, path);
+
+out:
+ btrfs_free_path(path);
+ mutex_unlock(&block_group->free_space_lock);
+ if (ret)
+ btrfs_abort_transaction(trans, fs_info->free_space_root, ret);
+ return ret;
+}
+
+int remove_block_group_free_space(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group)
+{
+ struct btrfs_root *root = fs_info->free_space_root;
+ struct btrfs_path *path;
+ struct btrfs_key key, found_key;
+ struct extent_buffer *leaf;
+ u64 start, end;
+ int done = 0, nr;
+ int ret;
+
+ if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+ return 0;
+
+ if (block_group->needs_free_space) {
+ /* We never added this block group to the free space tree. */
+ return 0;
+ }
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ start = block_group->key.objectid;
+ end = block_group->key.objectid + block_group->key.offset;
+
+ key.objectid = end - 1;
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+
+ while (!done) {
+ ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
+ if (ret)
+ goto out;
+
+ leaf = path->nodes[0];
+ nr = 0;
+ path->slots[0]++;
+ while (path->slots[0] > 0) {
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
+
+ if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
+ ASSERT(found_key.objectid == block_group->key.objectid);
+ ASSERT(found_key.offset == block_group->key.offset);
+ done = 1;
+ nr++;
+ path->slots[0]--;
+ break;
+ } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
+ found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
+ ASSERT(found_key.objectid >= start);
+ ASSERT(found_key.objectid < end);
+ ASSERT(found_key.objectid + found_key.offset <= end);
+ nr++;
+ path->slots[0]--;
+ } else {
+ ASSERT(0);
+ }
+ }
+
+ ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
+ if (ret)
+ goto out;
+ btrfs_release_path(path);
+ }
+
+ ret = 0;
+out:
+ btrfs_free_path(path);
+ if (ret)
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+}
+
+static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
+ struct btrfs_path *path,
+ u32 expected_extent_count)
+{
+ struct btrfs_block_group_cache *block_group;
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_root *root;
+ struct btrfs_key key;
+ int prev_bit = 0, bit;
+ /* Initialize to silence GCC. */
+ u64 extent_start = 0;
+ u64 end, offset;
+ u64 total_found = 0;
+ u32 extent_count = 0;
+ int ret;
+
+ block_group = caching_ctl->block_group;
+ fs_info = block_group->fs_info;
+ root = fs_info->free_space_root;
+
+ end = block_group->key.objectid + block_group->key.offset;
+
+ while (1) {
+ ret = btrfs_next_item(root, path);
+ if (ret < 0)
+ goto out;
+ if (ret)
+ break;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+ if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
+ break;
+
+ ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
+ ASSERT(key.objectid < end && key.objectid + key.offset <= end);
+
+ caching_ctl->progress = key.objectid;
+
+ offset = key.objectid;
+ while (offset < key.objectid + key.offset) {
+ bit = free_space_test_bit(block_group, path, offset);
+ if (prev_bit == 0 && bit == 1) {
+ extent_start = offset;
+ } else if (prev_bit == 1 && bit == 0) {
+ total_found += add_new_free_space(block_group,
+ fs_info,
+ extent_start,
+ offset);
+ if (total_found > CACHING_CTL_WAKE_UP) {
+ total_found = 0;
+ wake_up(&caching_ctl->wait);
+ }
+ extent_count++;
+ }
+ prev_bit = bit;
+ offset += block_group->sectorsize;
+ }
+ }
+ if (prev_bit == 1) {
+ total_found += add_new_free_space(block_group, fs_info,
+ extent_start, end);
+ extent_count++;
+ }
+
+ if (extent_count != expected_extent_count) {
+ btrfs_err(fs_info, "incorrect extent count for %llu; counted %u, expected %u",
+ block_group->key.objectid, extent_count,
+ expected_extent_count);
+ ASSERT(0);
+ ret = -EIO;
+ goto out;
+ }
+
+ caching_ctl->progress = (u64)-1;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
+ struct btrfs_path *path,
+ u32 expected_extent_count)
+{
+ struct btrfs_block_group_cache *block_group;
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_root *root;
+ struct btrfs_key key;
+ u64 end;
+ u64 total_found = 0;
+ u32 extent_count = 0;
+ int ret;
+
+ block_group = caching_ctl->block_group;
+ fs_info = block_group->fs_info;
+ root = fs_info->free_space_root;
+
+ end = block_group->key.objectid + block_group->key.offset;
+
+ while (1) {
+ ret = btrfs_next_item(root, path);
+ if (ret < 0)
+ goto out;
+ if (ret)
+ break;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+ if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
+ break;
+
+ ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
+ ASSERT(key.objectid < end && key.objectid + key.offset <= end);
+
+ caching_ctl->progress = key.objectid;
+
+ total_found += add_new_free_space(block_group, fs_info,
+ key.objectid,
+ key.objectid + key.offset);
+ if (total_found > CACHING_CTL_WAKE_UP) {
+ total_found = 0;
+ wake_up(&caching_ctl->wait);
+ }
+ extent_count++;
+ }
+
+ if (extent_count != expected_extent_count) {
+ btrfs_err(fs_info, "incorrect extent count for %llu; counted %u, expected %u",
+ block_group->key.objectid, extent_count,
+ expected_extent_count);
+ ASSERT(0);
+ ret = -EIO;
+ goto out;
+ }
+
+ caching_ctl->progress = (u64)-1;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
+{
+ struct btrfs_block_group_cache *block_group;
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_free_space_info *info;
+ struct btrfs_path *path;
+ u32 extent_count, flags;
+ int ret;
+
+ block_group = caching_ctl->block_group;
+ fs_info = block_group->fs_info;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ /*
+ * Just like caching_thread() doesn't want to deadlock on the extent
+ * tree, we don't want to deadlock on the free space tree.
+ */
+ path->skip_locking = 1;
+ path->search_commit_root = 1;
+ path->reada = 1;
+
+ info = search_free_space_info(NULL, fs_info, block_group, path, 0);
+ if (IS_ERR(info)) {
+ ret = PTR_ERR(info);
+ goto out;
+ }
+ extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
+ flags = btrfs_free_space_flags(path->nodes[0], info);
+
+ /*
+ * We left path pointing to the free space info item, so now
+ * load_free_space_foo can just iterate through the free space tree from
+ * there.
+ */
+ if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
+ ret = load_free_space_bitmaps(caching_ctl, path, extent_count);
+ else
+ ret = load_free_space_extents(caching_ctl, path, extent_count);
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
diff --git a/fs/btrfs/free-space-tree.h b/fs/btrfs/free-space-tree.h
new file mode 100644
index 000000000..54ffced3b
--- /dev/null
+++ b/fs/btrfs/free-space-tree.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2015 Facebook. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_FREE_SPACE_TREE
+#define __BTRFS_FREE_SPACE_TREE
+
+/*
+ * The default size for new free space bitmap items. The last bitmap in a block
+ * group may be truncated, and none of the free space tree code assumes that
+ * existing bitmaps are this size.
+ */
+#define BTRFS_FREE_SPACE_BITMAP_SIZE 256
+#define BTRFS_FREE_SPACE_BITMAP_BITS (BTRFS_FREE_SPACE_BITMAP_SIZE * BITS_PER_BYTE)
+
+void set_free_space_tree_thresholds(struct btrfs_block_group_cache *block_group);
+int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info);
+int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info);
+int load_free_space_tree(struct btrfs_caching_control *caching_ctl);
+int add_block_group_free_space(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group);
+int remove_block_group_free_space(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group);
+int add_to_free_space_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ u64 start, u64 size);
+int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ u64 start, u64 size);
+
+/* Exposed for testing. */
+struct btrfs_free_space_info *
+search_free_space_info(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path, int cow);
+int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path, u64 start, u64 size);
+int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path, u64 start, u64 size);
+int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path);
+int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path);
+int free_space_test_bit(struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path, u64 offset);
+
+#endif
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 07573dc16..e50316c4a 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -48,7 +48,7 @@ static int caching_kthread(void *data)
/* Since the commit root is read-only, we can safely skip locking. */
path->skip_locking = 1;
path->search_commit_root = 1;
- path->reada = 2;
+ path->reada = READA_FORWARD;
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
key.offset = 0;
@@ -282,7 +282,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
}
}
-#define INIT_THRESHOLD (((1024 * 32) / 2) / sizeof(struct btrfs_free_space))
+#define INIT_THRESHOLD ((SZ_32K / 2) / sizeof(struct btrfs_free_space))
#define INODES_PER_BITMAP (PAGE_CACHE_SIZE * 8)
/*
@@ -334,7 +334,7 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
return true;
}
-static struct btrfs_free_space_op free_ino_op = {
+static const struct btrfs_free_space_op free_ino_op = {
.recalc_thresholds = recalculate_thresholds,
.use_bitmap = use_bitmap,
};
@@ -356,7 +356,7 @@ static bool pinned_use_bitmap(struct btrfs_free_space_ctl *ctl,
return false;
}
-static struct btrfs_free_space_op pinned_free_ino_op = {
+static const struct btrfs_free_space_op pinned_free_ino_op = {
.recalc_thresholds = pinned_recalc_thresholds,
.use_bitmap = pinned_use_bitmap,
};
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4bc9dbf29..d96f5cf38 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -66,6 +66,13 @@ struct btrfs_iget_args {
struct btrfs_root *root;
};
+struct btrfs_dio_data {
+ u64 outstanding_extents;
+ u64 reserve;
+ u64 unsubmitted_oe_range_start;
+ u64 unsubmitted_oe_range_end;
+};
+
static const struct inode_operations btrfs_dir_inode_operations;
static const struct inode_operations btrfs_symlink_inode_operations;
static const struct inode_operations btrfs_dir_ro_inode_operations;
@@ -74,17 +81,16 @@ static const struct inode_operations btrfs_file_inode_operations;
static const struct address_space_operations btrfs_aops;
static const struct address_space_operations btrfs_symlink_aops;
static const struct file_operations btrfs_dir_file_operations;
-static struct extent_io_ops btrfs_extent_io_ops;
+static const struct extent_io_ops btrfs_extent_io_ops;
static struct kmem_cache *btrfs_inode_cachep;
-static struct kmem_cache *btrfs_delalloc_work_cachep;
struct kmem_cache *btrfs_trans_handle_cachep;
struct kmem_cache *btrfs_transaction_cachep;
struct kmem_cache *btrfs_path_cachep;
struct kmem_cache *btrfs_free_space_cachep;
#define S_SHIFT 12
-static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
+static const unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
[S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
[S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
[S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
@@ -414,15 +420,15 @@ static noinline void compress_file_range(struct inode *inode,
unsigned long nr_pages_ret = 0;
unsigned long total_compressed = 0;
unsigned long total_in = 0;
- unsigned long max_compressed = 128 * 1024;
- unsigned long max_uncompressed = 128 * 1024;
+ unsigned long max_compressed = SZ_128K;
+ unsigned long max_uncompressed = SZ_128K;
int i;
int will_compress;
int compress_type = root->fs_info->compress_type;
int redirty = 0;
/* if this is a small write inside eof, kick off a defrag */
- if ((end - start + 1) < 16 * 1024 &&
+ if ((end - start + 1) < SZ_16K &&
(start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
btrfs_add_inode_defrag(NULL, inode);
@@ -430,7 +436,7 @@ static noinline void compress_file_range(struct inode *inode,
again:
will_compress = 0;
nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1;
- nr_pages = min(nr_pages, (128 * 1024UL) / PAGE_CACHE_SIZE);
+ nr_pages = min_t(unsigned long, nr_pages, SZ_128K / PAGE_CACHE_SIZE);
/*
* we don't want to send crud past the end of i_size through
@@ -944,7 +950,7 @@ static noinline int cow_file_range(struct inode *inode,
disk_num_bytes = num_bytes;
/* if this is a small write inside eof, kick off defrag */
- if (num_bytes < 64 * 1024 &&
+ if (num_bytes < SZ_64K &&
(start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
btrfs_add_inode_defrag(NULL, inode);
@@ -1107,7 +1113,7 @@ static noinline void async_cow_submit(struct btrfs_work *work)
* atomic_sub_return implies a barrier for waitqueue_active
*/
if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) <
- 5 * 1024 * 1024 &&
+ 5 * SZ_1M &&
waitqueue_active(&root->fs_info->async_submit_wait))
wake_up(&root->fs_info->async_submit_wait);
@@ -1132,7 +1138,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
struct btrfs_root *root = BTRFS_I(inode)->root;
unsigned long nr_pages;
u64 cur_end;
- int limit = 10 * 1024 * 1024;
+ int limit = 10 * SZ_1M;
clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
1, 0, NULL, GFP_NOFS);
@@ -1148,7 +1154,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
!btrfs_test_opt(root, FORCE_COMPRESS))
cur_end = end;
else
- cur_end = min(end, start + 512 * 1024 - 1);
+ cur_end = min(end, start + SZ_512K - 1);
async_cow->end = cur_end;
INIT_LIST_HEAD(&async_cow->extents);
@@ -1989,7 +1995,7 @@ again:
page_start = page_offset(page);
page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
- lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 0,
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
&cached_state);
/* already ordered? We're done */
@@ -2482,7 +2488,7 @@ static noinline int relink_extent_backref(struct btrfs_path *path,
lock_start = backref->file_pos;
lock_end = backref->file_pos + backref->num_bytes - 1;
lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
- 0, &cached);
+ &cached);
ordered = btrfs_lookup_first_ordered_extent(inode, lock_end);
if (ordered) {
@@ -2874,7 +2880,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
lock_extent_bits(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset + ordered_extent->len - 1,
- 0, &cached_state);
+ &cached_state);
ret = test_range_bit(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset + ordered_extent->len - 1,
@@ -3106,52 +3112,46 @@ static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
start, (size_t)(end - start + 1));
}
-struct delayed_iput {
- struct list_head list;
- struct inode *inode;
-};
-
-/* JDM: If this is fs-wide, why can't we add a pointer to
- * btrfs_inode instead and avoid the allocation? */
void btrfs_add_delayed_iput(struct inode *inode)
{
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
- struct delayed_iput *delayed;
+ struct btrfs_inode *binode = BTRFS_I(inode);
if (atomic_add_unless(&inode->i_count, -1, 1))
return;
- delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
- delayed->inode = inode;
-
spin_lock(&fs_info->delayed_iput_lock);
- list_add_tail(&delayed->list, &fs_info->delayed_iputs);
+ if (binode->delayed_iput_count == 0) {
+ ASSERT(list_empty(&binode->delayed_iput));
+ list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
+ } else {
+ binode->delayed_iput_count++;
+ }
spin_unlock(&fs_info->delayed_iput_lock);
}
void btrfs_run_delayed_iputs(struct btrfs_root *root)
{
- LIST_HEAD(list);
struct btrfs_fs_info *fs_info = root->fs_info;
- struct delayed_iput *delayed;
- int empty;
spin_lock(&fs_info->delayed_iput_lock);
- empty = list_empty(&fs_info->delayed_iputs);
- spin_unlock(&fs_info->delayed_iput_lock);
- if (empty)
- return;
-
- spin_lock(&fs_info->delayed_iput_lock);
- list_splice_init(&fs_info->delayed_iputs, &list);
- spin_unlock(&fs_info->delayed_iput_lock);
-
- while (!list_empty(&list)) {
- delayed = list_entry(list.next, struct delayed_iput, list);
- list_del(&delayed->list);
- iput(delayed->inode);
- kfree(delayed);
+ while (!list_empty(&fs_info->delayed_iputs)) {
+ struct btrfs_inode *inode;
+
+ inode = list_first_entry(&fs_info->delayed_iputs,
+ struct btrfs_inode, delayed_iput);
+ if (inode->delayed_iput_count) {
+ inode->delayed_iput_count--;
+ list_move_tail(&inode->delayed_iput,
+ &fs_info->delayed_iputs);
+ } else {
+ list_del_init(&inode->delayed_iput);
+ }
+ spin_unlock(&fs_info->delayed_iput_lock);
+ iput(&inode->vfs_inode);
+ spin_lock(&fs_info->delayed_iput_lock);
}
+ spin_unlock(&fs_info->delayed_iput_lock);
}
/*
@@ -3347,7 +3347,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
ret = -ENOMEM;
goto out;
}
- path->reada = -1;
+ path->reada = READA_BACK;
key.objectid = BTRFS_ORPHAN_OBJECTID;
key.type = BTRFS_ORPHAN_ITEM_KEY;
@@ -3546,10 +3546,10 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
int scanned = 0;
if (!xattr_access) {
- xattr_access = btrfs_name_hash(POSIX_ACL_XATTR_ACCESS,
- strlen(POSIX_ACL_XATTR_ACCESS));
- xattr_default = btrfs_name_hash(POSIX_ACL_XATTR_DEFAULT,
- strlen(POSIX_ACL_XATTR_DEFAULT));
+ xattr_access = btrfs_name_hash(XATTR_NAME_POSIX_ACL_ACCESS,
+ strlen(XATTR_NAME_POSIX_ACL_ACCESS));
+ xattr_default = btrfs_name_hash(XATTR_NAME_POSIX_ACL_DEFAULT,
+ strlen(XATTR_NAME_POSIX_ACL_DEFAULT));
}
slot++;
@@ -3770,6 +3770,7 @@ cache_acl:
break;
case S_IFLNK:
inode->i_op = &btrfs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &btrfs_symlink_aops;
break;
default:
@@ -4313,7 +4314,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- path->reada = -1;
+ path->reada = READA_BACK;
/*
* We want to drop from the next block forward in case this new size is
@@ -4344,7 +4345,7 @@ search_again:
* up a huge file in a single leaf. Most of the time that
* bytes_deleted is > 0, it will be huge by the time we get here
*/
- if (be_nice && bytes_deleted > 32 * 1024 * 1024) {
+ if (be_nice && bytes_deleted > SZ_32M) {
if (btrfs_should_end_transaction(trans, root)) {
err = -EAGAIN;
goto error;
@@ -4587,7 +4588,7 @@ error:
btrfs_free_path(path);
- if (be_nice && bytes_deleted > 32 * 1024 * 1024) {
+ if (be_nice && bytes_deleted > SZ_32M) {
unsigned long updates = trans->delayed_ref_updates;
if (updates) {
trans->delayed_ref_updates = 0;
@@ -4664,7 +4665,7 @@ again:
}
wait_on_page_writeback(page);
- lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state);
+ lock_extent_bits(io_tree, page_start, page_end, &cached_state);
set_page_extent_mapped(page);
ordered = btrfs_lookup_ordered_extent(inode, page_start);
@@ -4795,7 +4796,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
while (1) {
struct btrfs_ordered_extent *ordered;
- lock_extent_bits(io_tree, hole_start, block_end - 1, 0,
+ lock_extent_bits(io_tree, hole_start, block_end - 1,
&cached_state);
ordered = btrfs_lookup_ordered_range(inode, hole_start,
block_end - hole_start);
@@ -4871,26 +4872,6 @@ next:
return err;
}
-static int wait_snapshoting_atomic_t(atomic_t *a)
-{
- schedule();
- return 0;
-}
-
-static void wait_for_snapshot_creation(struct btrfs_root *root)
-{
- while (true) {
- int ret;
-
- ret = btrfs_start_write_no_snapshoting(root);
- if (ret)
- break;
- wait_on_atomic_t(&root->will_be_snapshoted,
- wait_snapshoting_atomic_t,
- TASK_UNINTERRUPTIBLE);
- }
-}
-
static int btrfs_setsize(struct inode *inode, struct iattr *attr)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -4922,7 +4903,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
* truncation, it must capture all writes that happened before
* this truncation.
*/
- wait_for_snapshot_creation(root);
+ btrfs_wait_for_snapshot_creation(root);
ret = btrfs_cont_expand(inode, oldsize, newsize);
if (ret) {
btrfs_end_write_no_snapshoting(root);
@@ -5107,7 +5088,7 @@ static void evict_inode_truncate_pages(struct inode *inode)
end = state->end;
spin_unlock(&io_tree->lock);
- lock_extent_bits(io_tree, start, end, 0, &cached_state);
+ lock_extent_bits(io_tree, start, end, &cached_state);
/*
* If still has DELALLOC flag, the extent didn't reach disk,
@@ -5300,7 +5281,6 @@ void btrfs_evict_inode(struct inode *inode)
no_delete:
btrfs_remove_delayed_node(inode);
clear_inode(inode);
- return;
}
/*
@@ -5750,7 +5730,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
if (!path)
return -ENOMEM;
- path->reada = 1;
+ path->reada = READA_FORWARD;
if (key_type == BTRFS_DIR_INDEX_KEY) {
INIT_LIST_HEAD(&ins_list);
@@ -6697,7 +6677,7 @@ static int merge_extent_mapping(struct extent_map_tree *em_tree,
}
static noinline int uncompress_inline(struct btrfs_path *path,
- struct inode *inode, struct page *page,
+ struct page *page,
size_t pg_offset, u64 extent_offset,
struct btrfs_file_extent_item *item)
{
@@ -6794,7 +6774,7 @@ again:
* Chances are we'll be called again, so go ahead and do
* readahead
*/
- path->reada = 1;
+ path->reada = READA_FORWARD;
}
ret = btrfs_lookup_file_extent(trans, root, path,
@@ -6893,8 +6873,7 @@ next:
if (create == 0 && !PageUptodate(page)) {
if (btrfs_file_extent_compression(leaf, item) !=
BTRFS_COMPRESS_NONE) {
- ret = uncompress_inline(path, inode, page,
- pg_offset,
+ ret = uncompress_inline(path, page, pg_offset,
extent_offset, item);
if (ret) {
err = ret;
@@ -7149,21 +7128,41 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
if (ret)
return ERR_PTR(ret);
- em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
- ins.offset, ins.offset, ins.offset, 0);
- if (IS_ERR(em)) {
- btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
- return em;
- }
-
+ /*
+ * Create the ordered extent before the extent map. This is to avoid
+ * races with the fast fsync path that would lead to it logging file
+ * extent items that point to disk extents that were not yet written to.
+ * The fast fsync path collects ordered extents into a local list and
+ * then collects all the new extent maps, so we must create the ordered
+ * extent first and make sure the fast fsync path collects any new
+ * ordered extents after collecting new extent maps as well.
+ * The fsync path simply can not rely on inode_dio_wait() because it
+ * causes deadlock with AIO.
+ */
ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
ins.offset, ins.offset, 0);
if (ret) {
btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
- free_extent_map(em);
return ERR_PTR(ret);
}
+ em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
+ ins.offset, ins.offset, ins.offset, 0);
+ if (IS_ERR(em)) {
+ struct btrfs_ordered_extent *oe;
+
+ btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
+ oe = btrfs_lookup_ordered_extent(inode, start);
+ ASSERT(oe);
+ if (WARN_ON(!oe))
+ return em;
+ set_bit(BTRFS_ORDERED_IOERR, &oe->flags);
+ set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags);
+ btrfs_remove_ordered_extent(inode, oe);
+ /* Once for our lookup and once for the ordered extents tree. */
+ btrfs_put_ordered_extent(oe);
+ btrfs_put_ordered_extent(oe);
+ }
return em;
}
@@ -7390,7 +7389,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
while (1) {
lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- 0, cached_state);
+ cached_state);
/*
* We're concerned with the entire range that we're going to be
* doing DIO to, so we need to make sure theres no ordered
@@ -7418,25 +7417,21 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
} else {
- /* Screw you mmap */
- ret = btrfs_fdatawrite_range(inode, lockstart, lockend);
- if (ret)
- break;
- ret = filemap_fdatawait_range(inode->i_mapping,
- lockstart,
- lockend);
- if (ret)
- break;
-
/*
- * If we found a page that couldn't be invalidated just
- * fall back to buffered.
+ * We could trigger writeback for this range (and wait
+ * for it to complete) and then invalidate the pages for
+ * this range (through invalidate_inode_pages2_range()),
+ * but that can lead us to a deadlock with a concurrent
+ * call to readpages() (a buffered read or a defrag call
+ * triggered a readahead) on a page lock due to an
+ * ordered dio extent we created before but did not have
+ * yet a corresponding bio submitted (whence it can not
+ * complete), which makes readpages() wait for that
+ * ordered extent to complete while holding a lock on
+ * that page.
*/
- ret = invalidate_inode_pages2_range(inode->i_mapping,
- lockstart >> PAGE_CACHE_SHIFT,
- lockend >> PAGE_CACHE_SHIFT);
- if (ret)
- break;
+ ret = -ENOTBLK;
+ break;
}
cond_resched();
@@ -7492,11 +7487,6 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
return em;
}
-struct btrfs_dio_data {
- u64 outstanding_extents;
- u64 reserve;
-};
-
static void adjust_dio_outstanding_extents(struct inode *inode,
struct btrfs_dio_data *dio_data,
const u64 len)
@@ -7680,6 +7670,7 @@ unlock:
btrfs_free_reserved_data_space(inode, start, len);
WARN_ON(dio_data->reserve < len);
dio_data->reserve -= len;
+ dio_data->unsubmitted_oe_range_end = start + len;
current->journal_info = dio_data;
}
@@ -8003,22 +7994,22 @@ static void btrfs_endio_direct_read(struct bio *bio)
bio_put(bio);
}
-static void btrfs_endio_direct_write(struct bio *bio)
+static void btrfs_endio_direct_write_update_ordered(struct inode *inode,
+ const u64 offset,
+ const u64 bytes,
+ const int uptodate)
{
- struct btrfs_dio_private *dip = bio->bi_private;
- struct inode *inode = dip->inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ordered_extent *ordered = NULL;
- u64 ordered_offset = dip->logical_offset;
- u64 ordered_bytes = dip->bytes;
- struct bio *dio_bio;
+ u64 ordered_offset = offset;
+ u64 ordered_bytes = bytes;
int ret;
again:
ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
&ordered_offset,
ordered_bytes,
- !bio->bi_error);
+ uptodate);
if (!ret)
goto out_test;
@@ -8031,13 +8022,22 @@ out_test:
* our bio might span multiple ordered extents. If we haven't
* completed the accounting for the whole dio, go back and try again
*/
- if (ordered_offset < dip->logical_offset + dip->bytes) {
- ordered_bytes = dip->logical_offset + dip->bytes -
- ordered_offset;
+ if (ordered_offset < offset + bytes) {
+ ordered_bytes = offset + bytes - ordered_offset;
ordered = NULL;
goto again;
}
- dio_bio = dip->dio_bio;
+}
+
+static void btrfs_endio_direct_write(struct bio *bio)
+{
+ struct btrfs_dio_private *dip = bio->bi_private;
+ struct bio *dio_bio = dip->dio_bio;
+
+ btrfs_endio_direct_write_update_ordered(dip->inode,
+ dip->logical_offset,
+ dip->bytes,
+ !bio->bi_error);
kfree(dip);
@@ -8346,6 +8346,21 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
dip->subio_endio = btrfs_subio_endio_read;
}
+ /*
+ * Reset the range for unsubmitted ordered extents (to a 0 length range)
+ * even if we fail to submit a bio, because in such case we do the
+ * corresponding error handling below and it must not be done a second
+ * time by btrfs_direct_IO().
+ */
+ if (write) {
+ struct btrfs_dio_data *dio_data = current->journal_info;
+
+ dio_data->unsubmitted_oe_range_end = dip->logical_offset +
+ dip->bytes;
+ dio_data->unsubmitted_oe_range_start =
+ dio_data->unsubmitted_oe_range_end;
+ }
+
ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
if (!ret)
return;
@@ -8374,24 +8389,15 @@ free_ordered:
dip = NULL;
io_bio = NULL;
} else {
- if (write) {
- struct btrfs_ordered_extent *ordered;
-
- ordered = btrfs_lookup_ordered_extent(inode,
- file_offset);
- set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
- /*
- * Decrements our ref on the ordered extent and removes
- * the ordered extent from the inode's ordered tree,
- * doing all the proper resource cleanup such as for the
- * reserved space and waking up any waiters for this
- * ordered extent (through btrfs_remove_ordered_extent).
- */
- btrfs_finish_ordered_io(ordered);
- } else {
+ if (write)
+ btrfs_endio_direct_write_update_ordered(inode,
+ file_offset,
+ dio_bio->bi_iter.bi_size,
+ 0);
+ else
unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
file_offset + dio_bio->bi_iter.bi_size - 1);
- }
+
dio_bio->bi_error = -EIO;
/*
* Releases and cleans up our dio_bio, no need to bio_put()
@@ -8475,7 +8481,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
* not unlock the i_mutex at this case.
*/
if (offset + count <= inode->i_size) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
relock = true;
}
ret = btrfs_delalloc_reserve_space(inode, offset, count);
@@ -8491,6 +8497,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
* originally calculated. Abuse current->journal_info for this.
*/
dio_data.reserve = round_up(count, root->sectorsize);
+ dio_data.unsubmitted_oe_range_start = (u64)offset;
+ dio_data.unsubmitted_oe_range_end = (u64)offset;
current->journal_info = &dio_data;
} else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
&BTRFS_I(inode)->runtime_flags)) {
@@ -8509,6 +8517,19 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
if (dio_data.reserve)
btrfs_delalloc_release_space(inode, offset,
dio_data.reserve);
+ /*
+ * On error we might have left some ordered extents
+ * without submitting corresponding bios for them, so
+ * cleanup them up to avoid other tasks getting them
+ * and waiting for them to complete forever.
+ */
+ if (dio_data.unsubmitted_oe_range_start <
+ dio_data.unsubmitted_oe_range_end)
+ btrfs_endio_direct_write_update_ordered(inode,
+ dio_data.unsubmitted_oe_range_start,
+ dio_data.unsubmitted_oe_range_end -
+ dio_data.unsubmitted_oe_range_start,
+ 0);
} else if (ret >= 0 && (size_t)ret < count)
btrfs_delalloc_release_space(inode, offset,
count - (size_t)ret);
@@ -8517,7 +8538,7 @@ out:
if (wakeup)
inode_dio_end(inode);
if (relock)
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
return ret;
}
@@ -8639,7 +8660,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
}
if (!inode_evicting)
- lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
+ lock_extent_bits(tree, page_start, page_end, &cached_state);
ordered = btrfs_lookup_ordered_extent(inode, page_start);
if (ordered) {
/*
@@ -8677,7 +8698,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
btrfs_put_ordered_extent(ordered);
if (!inode_evicting) {
cached_state = NULL;
- lock_extent_bits(tree, page_start, page_end, 0,
+ lock_extent_bits(tree, page_start, page_end,
&cached_state);
}
}
@@ -8775,7 +8796,7 @@ again:
}
wait_on_page_writeback(page);
- lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state);
+ lock_extent_bits(io_tree, page_start, page_end, &cached_state);
set_page_extent_mapped(page);
/*
@@ -9049,6 +9070,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->dir_index = 0;
ei->last_unlink_trans = 0;
ei->last_log_commit = 0;
+ ei->delayed_iput_count = 0;
spin_lock_init(&ei->lock);
ei->outstanding_extents = 0;
@@ -9073,6 +9095,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
mutex_init(&ei->delalloc_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree);
INIT_LIST_HEAD(&ei->delalloc_inodes);
+ INIT_LIST_HEAD(&ei->delayed_iput);
RB_CLEAR_NODE(&ei->rb_node);
return inode;
@@ -9177,15 +9200,14 @@ void btrfs_destroy_cachep(void)
kmem_cache_destroy(btrfs_path_cachep);
if (btrfs_free_space_cachep)
kmem_cache_destroy(btrfs_free_space_cachep);
- if (btrfs_delalloc_work_cachep)
- kmem_cache_destroy(btrfs_delalloc_work_cachep);
}
int btrfs_init_cachep(void)
{
btrfs_inode_cachep = kmem_cache_create("btrfs_inode",
sizeof(struct btrfs_inode), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once);
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT,
+ init_once);
if (!btrfs_inode_cachep)
goto fail;
@@ -9213,13 +9235,6 @@ int btrfs_init_cachep(void)
if (!btrfs_free_space_cachep)
goto fail;
- btrfs_delalloc_work_cachep = kmem_cache_create("btrfs_delalloc_work",
- sizeof(struct btrfs_delalloc_work), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
- NULL);
- if (!btrfs_delalloc_work_cachep)
- goto fail;
-
return 0;
fail:
btrfs_destroy_cachep();
@@ -9443,14 +9458,10 @@ static void btrfs_run_delalloc_work(struct btrfs_work *work)
delalloc_work = container_of(work, struct btrfs_delalloc_work,
work);
inode = delalloc_work->inode;
- if (delalloc_work->wait) {
- btrfs_wait_ordered_range(inode, 0, (u64)-1);
- } else {
+ filemap_flush(inode->i_mapping);
+ if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+ &BTRFS_I(inode)->runtime_flags))
filemap_flush(inode->i_mapping);
- if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
- &BTRFS_I(inode)->runtime_flags))
- filemap_flush(inode->i_mapping);
- }
if (delalloc_work->delay_iput)
btrfs_add_delayed_iput(inode);
@@ -9460,18 +9471,17 @@ static void btrfs_run_delalloc_work(struct btrfs_work *work)
}
struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
- int wait, int delay_iput)
+ int delay_iput)
{
struct btrfs_delalloc_work *work;
- work = kmem_cache_zalloc(btrfs_delalloc_work_cachep, GFP_NOFS);
+ work = kmalloc(sizeof(*work), GFP_NOFS);
if (!work)
return NULL;
init_completion(&work->completion);
INIT_LIST_HEAD(&work->list);
work->inode = inode;
- work->wait = wait;
work->delay_iput = delay_iput;
WARN_ON_ONCE(!inode);
btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
@@ -9483,7 +9493,7 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
{
wait_for_completion(&work->completion);
- kmem_cache_free(btrfs_delalloc_work_cachep, work);
+ kfree(work);
}
/*
@@ -9519,7 +9529,7 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput,
}
spin_unlock(&root->delalloc_lock);
- work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
+ work = btrfs_alloc_delalloc_work(inode, delay_iput);
if (!work) {
if (delay_iput)
btrfs_add_delayed_iput(inode);
@@ -9696,10 +9706,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
if (err)
goto out_unlock_inode;
- err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
- if (err)
- goto out_unlock_inode;
-
path = btrfs_alloc_path();
if (!path) {
err = -ENOMEM;
@@ -9732,10 +9738,18 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
btrfs_free_path(path);
inode->i_op = &btrfs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &btrfs_symlink_aops;
inode_set_bytes(inode, name_len);
btrfs_i_size_write(inode, name_len);
err = btrfs_update_inode(trans, root, inode);
+ /*
+ * Last step, add directory indexes for our symlink inode. This is the
+ * last step to avoid extra cleanup of these indexes if an error happens
+ * elsewhere above.
+ */
+ if (!err)
+ err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
if (err) {
drop_inode = 1;
goto out_unlock_inode;
@@ -9786,7 +9800,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
}
}
- cur_bytes = min(num_bytes, 256ULL * 1024 * 1024);
+ cur_bytes = min_t(u64, num_bytes, SZ_256M);
cur_bytes = max(cur_bytes, min_size);
/*
* If we are severely fragmented we could end up with really
@@ -10021,7 +10035,7 @@ static const struct inode_operations btrfs_dir_inode_operations = {
.setattr = btrfs_setattr,
.mknod = btrfs_mknod,
.setxattr = btrfs_setxattr,
- .getxattr = btrfs_getxattr,
+ .getxattr = generic_getxattr,
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
.permission = btrfs_permission,
@@ -10050,7 +10064,7 @@ static const struct file_operations btrfs_dir_file_operations = {
.fsync = btrfs_sync_file,
};
-static struct extent_io_ops btrfs_extent_io_ops = {
+static const struct extent_io_ops btrfs_extent_io_ops = {
.fill_delalloc = run_delalloc_range,
.submit_bio_hook = btrfs_submit_bio_hook,
.merge_bio_hook = btrfs_merge_bio_hook,
@@ -10098,7 +10112,7 @@ static const struct inode_operations btrfs_file_inode_operations = {
.getattr = btrfs_getattr,
.setattr = btrfs_setattr,
.setxattr = btrfs_setxattr,
- .getxattr = btrfs_getxattr,
+ .getxattr = generic_getxattr,
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
.permission = btrfs_permission,
@@ -10112,7 +10126,7 @@ static const struct inode_operations btrfs_special_inode_operations = {
.setattr = btrfs_setattr,
.permission = btrfs_permission,
.setxattr = btrfs_setxattr,
- .getxattr = btrfs_getxattr,
+ .getxattr = generic_getxattr,
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
.get_acl = btrfs_get_acl,
@@ -10121,13 +10135,12 @@ static const struct inode_operations btrfs_special_inode_operations = {
};
static const struct inode_operations btrfs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.getattr = btrfs_getattr,
.setattr = btrfs_setattr,
.permission = btrfs_permission,
.setxattr = btrfs_setxattr,
- .getxattr = btrfs_getxattr,
+ .getxattr = generic_getxattr,
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
.update_time = btrfs_update_time,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index f07d01bc4..48aee9846 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -240,7 +240,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ip_oldflags = ip->flags;
i_oldflags = inode->i_flags;
@@ -358,7 +358,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
}
out_unlock:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
mnt_drop_write_file(file);
return ret;
}
@@ -659,22 +659,28 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
return -EINVAL;
+ pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
+ if (!pending_snapshot)
+ return -ENOMEM;
+
+ pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item),
+ GFP_NOFS);
+ pending_snapshot->path = btrfs_alloc_path();
+ if (!pending_snapshot->root_item || !pending_snapshot->path) {
+ ret = -ENOMEM;
+ goto free_pending;
+ }
+
atomic_inc(&root->will_be_snapshoted);
smp_mb__after_atomic();
btrfs_wait_for_no_snapshoting_writes(root);
ret = btrfs_start_delalloc_inodes(root, 0);
if (ret)
- goto out;
+ goto dec_and_free;
btrfs_wait_ordered_extents(root, -1);
- pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
- if (!pending_snapshot) {
- ret = -ENOMEM;
- goto out;
- }
-
btrfs_init_block_rsv(&pending_snapshot->block_rsv,
BTRFS_BLOCK_RSV_TEMP);
/*
@@ -690,7 +696,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
&pending_snapshot->qgroup_reserved,
false);
if (ret)
- goto free;
+ goto dec_and_free;
pending_snapshot->dentry = dentry;
pending_snapshot->root = root;
@@ -741,11 +747,14 @@ fail:
btrfs_subvolume_release_metadata(BTRFS_I(dir)->root,
&pending_snapshot->block_rsv,
pending_snapshot->qgroup_reserved);
-free:
- kfree(pending_snapshot);
-out:
+dec_and_free:
if (atomic_dec_and_test(&root->will_be_snapshoted))
wake_up_atomic_t(&root->will_be_snapshoted);
+free_pending:
+ kfree(pending_snapshot->root_item);
+ btrfs_free_path(pending_snapshot->path);
+ kfree(pending_snapshot);
+
return ret;
}
@@ -872,7 +881,7 @@ out_up_read:
out_dput:
dput(dentry);
out_unlock:
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
return error;
}
@@ -996,7 +1005,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
u64 end = start + len - 1;
/* get the big lock and read metadata off disk */
- lock_extent_bits(io_tree, start, end, 0, &cached);
+ lock_extent_bits(io_tree, start, end, &cached);
em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
unlock_extent_cached(io_tree, start, end, &cached, GFP_NOFS);
@@ -1020,7 +1029,7 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em)
if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
ret = false;
else if ((em->block_start + em->block_len == next->block_start) &&
- (em->block_len > 128 * 1024 && next->block_len > 128 * 1024))
+ (em->block_len > SZ_128K && next->block_len > SZ_128K))
ret = false;
free_extent_map(next);
@@ -1144,7 +1153,7 @@ again:
page_end = page_start + PAGE_CACHE_SIZE - 1;
while (1) {
lock_extent_bits(tree, page_start, page_end,
- 0, &cached_state);
+ &cached_state);
ordered = btrfs_lookup_ordered_extent(inode,
page_start);
unlock_extent_cached(tree, page_start, page_end,
@@ -1204,7 +1213,7 @@ again:
page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE;
lock_extent_bits(&BTRFS_I(inode)->io_tree,
- page_start, page_end - 1, 0, &cached_state);
+ page_start, page_end - 1, &cached_state);
clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0,
@@ -1266,9 +1275,9 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
int defrag_count = 0;
int compress_type = BTRFS_COMPRESS_ZLIB;
u32 extent_thresh = range->extent_thresh;
- unsigned long max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT;
+ unsigned long max_cluster = SZ_256K >> PAGE_CACHE_SHIFT;
unsigned long cluster = max_cluster;
- u64 new_align = ~((u64)128 * 1024 - 1);
+ u64 new_align = ~((u64)SZ_128K - 1);
struct page **pages = NULL;
if (isize == 0)
@@ -1285,7 +1294,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
}
if (extent_thresh == 0)
- extent_thresh = 256 * 1024;
+ extent_thresh = SZ_256K;
/*
* if we were not given a file, allocate a readahead
@@ -1317,7 +1326,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
if (newer_than) {
ret = find_new_extents(root, inode, newer_than,
- &newer_off, 64 * 1024);
+ &newer_off, SZ_64K);
if (!ret) {
range->start = newer_off;
/*
@@ -1384,18 +1393,18 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
ra_index += cluster;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
BTRFS_I(inode)->force_compress = compress_type;
ret = cluster_pages_for_defrag(inode, pages, i, cluster);
if (ret < 0) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
goto out_ra;
}
defrag_count += ret;
balance_dirty_pages_ratelimited(inode->i_mapping);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (newer_than) {
if (newer_off == (u64)-1)
@@ -1407,9 +1416,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
newer_off = max(newer_off + 1,
(u64)i << PAGE_CACHE_SHIFT);
- ret = find_new_extents(root, inode,
- newer_than, &newer_off,
- 64 * 1024);
+ ret = find_new_extents(root, inode, newer_than,
+ &newer_off, SZ_64K);
if (!ret) {
range->start = newer_off;
i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
@@ -1457,9 +1465,9 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
out_ra:
if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
if (!file)
kfree(ra);
@@ -1575,7 +1583,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
new_size = old_size + new_size;
}
- if (new_size < 256 * 1024 * 1024) {
+ if (new_size < SZ_256M) {
ret = -EINVAL;
goto out_free;
}
@@ -2164,7 +2172,7 @@ static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
struct inode *inode;
int ret;
size_t buf_size;
- const size_t buf_limit = 16 * 1024 * 1024;
+ const size_t buf_limit = SZ_16M;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -2422,7 +2430,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
goto out_dput;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* Don't allow to delete a subvolume with send in progress. This is
@@ -2535,7 +2543,7 @@ out_up_write:
spin_unlock(&dest->root_item_lock);
}
out_unlock_inode:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (!err) {
d_invalidate(dentry);
btrfs_invalidate_inodes(dest);
@@ -2551,7 +2559,7 @@ out_unlock_inode:
out_dput:
dput(dentry);
out_unlock_dir:
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
out_drop_write:
mnt_drop_write_file(file);
out:
@@ -2871,8 +2879,8 @@ static int lock_extent_range(struct inode *inode, u64 off, u64 len,
static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
{
- mutex_unlock(&inode1->i_mutex);
- mutex_unlock(&inode2->i_mutex);
+ inode_unlock(inode1);
+ inode_unlock(inode2);
}
static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
@@ -2880,8 +2888,8 @@ static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
if (inode1 < inode2)
swap(inode1, inode2);
- mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(inode1, I_MUTEX_PARENT);
+ inode_lock_nested(inode2, I_MUTEX_CHILD);
}
static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
@@ -3003,7 +3011,7 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
flush_dcache_page(dst_page);
if (memcmp(addr, dst_addr, cmp_len))
- ret = BTRFS_SAME_DATA_DIFFERS;
+ ret = -EBADE;
kunmap_atomic(addr);
kunmap_atomic(dst_addr);
@@ -3055,7 +3063,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
return 0;
if (same_inode) {
- mutex_lock(&src->i_mutex);
+ inode_lock(src);
ret = extent_same_check_offsets(src, loff, &len, olen);
if (ret)
@@ -3162,62 +3170,25 @@ again:
btrfs_cmp_data_free(&cmp);
out_unlock:
if (same_inode)
- mutex_unlock(&src->i_mutex);
+ inode_unlock(src);
else
btrfs_double_inode_unlock(src, dst);
return ret;
}
-#define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024)
+#define BTRFS_MAX_DEDUPE_LEN SZ_16M
-static long btrfs_ioctl_file_extent_same(struct file *file,
- struct btrfs_ioctl_same_args __user *argp)
+ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
+ struct file *dst_file, u64 dst_loff)
{
- struct btrfs_ioctl_same_args *same = NULL;
- struct btrfs_ioctl_same_extent_info *info;
- struct inode *src = file_inode(file);
- u64 off;
- u64 len;
- int i;
- int ret;
- unsigned long size;
+ struct inode *src = file_inode(src_file);
+ struct inode *dst = file_inode(dst_file);
u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
- bool is_admin = capable(CAP_SYS_ADMIN);
- u16 count;
-
- if (!(file->f_mode & FMODE_READ))
- return -EINVAL;
+ ssize_t res;
- ret = mnt_want_write_file(file);
- if (ret)
- return ret;
-
- if (get_user(count, &argp->dest_count)) {
- ret = -EFAULT;
- goto out;
- }
-
- size = offsetof(struct btrfs_ioctl_same_args __user, info[count]);
-
- same = memdup_user(argp, size);
-
- if (IS_ERR(same)) {
- ret = PTR_ERR(same);
- same = NULL;
- goto out;
- }
-
- off = same->logical_offset;
- len = same->length;
-
- /*
- * Limit the total length we will dedupe for each operation.
- * This is intended to bound the total time spent in this
- * ioctl to something sane.
- */
- if (len > BTRFS_MAX_DEDUPE_LEN)
- len = BTRFS_MAX_DEDUPE_LEN;
+ if (olen > BTRFS_MAX_DEDUPE_LEN)
+ olen = BTRFS_MAX_DEDUPE_LEN;
if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) {
/*
@@ -3225,58 +3196,13 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
* result, btrfs_cmp_data() won't correctly handle
* this situation without an update.
*/
- ret = -EINVAL;
- goto out;
- }
-
- ret = -EISDIR;
- if (S_ISDIR(src->i_mode))
- goto out;
-
- ret = -EACCES;
- if (!S_ISREG(src->i_mode))
- goto out;
-
- /* pre-format output fields to sane values */
- for (i = 0; i < count; i++) {
- same->info[i].bytes_deduped = 0ULL;
- same->info[i].status = 0;
- }
-
- for (i = 0, info = same->info; i < count; i++, info++) {
- struct inode *dst;
- struct fd dst_file = fdget(info->fd);
- if (!dst_file.file) {
- info->status = -EBADF;
- continue;
- }
- dst = file_inode(dst_file.file);
-
- if (!(is_admin || (dst_file.file->f_mode & FMODE_WRITE))) {
- info->status = -EINVAL;
- } else if (file->f_path.mnt != dst_file.file->f_path.mnt) {
- info->status = -EXDEV;
- } else if (S_ISDIR(dst->i_mode)) {
- info->status = -EISDIR;
- } else if (!S_ISREG(dst->i_mode)) {
- info->status = -EACCES;
- } else {
- info->status = btrfs_extent_same(src, off, len, dst,
- info->logical_offset);
- if (info->status == 0)
- info->bytes_deduped += len;
- }
- fdput(dst_file);
+ return -EINVAL;
}
- ret = copy_to_user(argp, same, size);
- if (ret)
- ret = -EFAULT;
-
-out:
- mnt_drop_write_file(file);
- kfree(same);
- return ret;
+ res = btrfs_extent_same(src, loff, olen, dst, dst_loff);
+ if (res)
+ return res;
+ return olen;
}
static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
@@ -3551,7 +3477,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
return ret;
}
- path->reada = 2;
+ path->reada = READA_FORWARD;
/* clone data */
key.objectid = btrfs_ino(src);
key.type = BTRFS_EXTENT_DATA_KEY;
@@ -3852,17 +3778,16 @@ out:
return ret;
}
-static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
- u64 off, u64 olen, u64 destoff)
+static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
+ u64 off, u64 olen, u64 destoff)
{
struct inode *inode = file_inode(file);
+ struct inode *src = file_inode(file_src);
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct fd src_file;
- struct inode *src;
int ret;
u64 len = olen;
u64 bs = root->fs_info->sb->s_blocksize;
- int same_inode = 0;
+ int same_inode = src == inode;
/*
* TODO:
@@ -3875,54 +3800,25 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
* be either compressed or non-compressed.
*/
- /* the destination must be opened for writing */
- if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
- return -EINVAL;
-
if (btrfs_root_readonly(root))
return -EROFS;
- ret = mnt_want_write_file(file);
- if (ret)
- return ret;
-
- src_file = fdget(srcfd);
- if (!src_file.file) {
- ret = -EBADF;
- goto out_drop_write;
- }
-
- ret = -EXDEV;
- if (src_file.file->f_path.mnt != file->f_path.mnt)
- goto out_fput;
-
- src = file_inode(src_file.file);
-
- ret = -EINVAL;
- if (src == inode)
- same_inode = 1;
-
- /* the src must be open for reading */
- if (!(src_file.file->f_mode & FMODE_READ))
- goto out_fput;
+ if (file_src->f_path.mnt != file->f_path.mnt ||
+ src->i_sb != inode->i_sb)
+ return -EXDEV;
/* don't make the dst file partly checksummed */
if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
- goto out_fput;
+ return -EINVAL;
- ret = -EISDIR;
if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
- goto out_fput;
-
- ret = -EXDEV;
- if (src->i_sb != inode->i_sb)
- goto out_fput;
+ return -EISDIR;
if (!same_inode) {
btrfs_double_inode_lock(src, inode);
} else {
- mutex_lock(&src->i_mutex);
+ inode_lock(src);
}
/* determine range to clone */
@@ -3999,22 +3895,26 @@ out_unlock:
if (!same_inode)
btrfs_double_inode_unlock(src, inode);
else
- mutex_unlock(&src->i_mutex);
-out_fput:
- fdput(src_file);
-out_drop_write:
- mnt_drop_write_file(file);
+ inode_unlock(src);
return ret;
}
-static long btrfs_ioctl_clone_range(struct file *file, void __user *argp)
+ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t len, unsigned int flags)
{
- struct btrfs_ioctl_clone_range_args args;
+ ssize_t ret;
- if (copy_from_user(&args, argp, sizeof(args)))
- return -EFAULT;
- return btrfs_ioctl_clone(file, args.src_fd, args.src_offset,
- args.src_length, args.dest_offset);
+ ret = btrfs_clone_files(file_out, file_in, pos_in, len, pos_out);
+ if (ret == 0)
+ ret = len;
+ return ret;
+}
+
+int btrfs_clone_file_range(struct file *src_file, loff_t off,
+ struct file *dst_file, loff_t destoff, u64 len)
+{
+ return btrfs_clone_files(dst_file, src_file, off, len, destoff);
}
/*
@@ -4226,7 +4126,7 @@ static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
return -ENOMEM;
space_args.total_spaces = 0;
- dest = kmalloc(alloc_size, GFP_NOFS);
+ dest = kmalloc(alloc_size, GFP_KERNEL);
if (!dest)
return -ENOMEM;
dest_orig = dest;
@@ -4603,7 +4503,7 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
goto out;
}
- size = min_t(u32, loi->size, 64 * 1024);
+ size = min_t(u32, loi->size, SZ_64K);
inodes = init_data_container(size);
if (IS_ERR(inodes)) {
ret = PTR_ERR(inodes);
@@ -4752,7 +4652,7 @@ locked:
goto out_bargs;
}
- bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
+ bctl = kzalloc(sizeof(*bctl), GFP_KERNEL);
if (!bctl) {
ret = -ENOMEM;
goto out_bargs;
@@ -4838,7 +4738,7 @@ static long btrfs_ioctl_balance_progress(struct btrfs_root *root,
goto out;
}
- bargs = kzalloc(sizeof(*bargs), GFP_NOFS);
+ bargs = kzalloc(sizeof(*bargs), GFP_KERNEL);
if (!bargs) {
ret = -ENOMEM;
goto out;
@@ -5098,7 +4998,7 @@ static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- qsa = kzalloc(sizeof(*qsa), GFP_NOFS);
+ qsa = kzalloc(sizeof(*qsa), GFP_KERNEL);
if (!qsa)
return -ENOMEM;
@@ -5228,7 +5128,7 @@ static long btrfs_ioctl_set_received_subvol_32(struct file *file,
goto out;
}
- args64 = kmalloc(sizeof(*args64), GFP_NOFS);
+ args64 = kmalloc(sizeof(*args64), GFP_KERNEL);
if (!args64) {
ret = -ENOMEM;
goto out;
@@ -5365,7 +5265,7 @@ out_unlock:
static int btrfs_ioctl_get_supported_features(struct file *file,
void __user *arg)
{
- static struct btrfs_ioctl_feature_flags features[3] = {
+ static const struct btrfs_ioctl_feature_flags features[3] = {
INIT_FEATURE_FLAGS(SUPP),
INIT_FEATURE_FLAGS(SAFE_SET),
INIT_FEATURE_FLAGS(SAFE_CLEAR)
@@ -5564,10 +5464,6 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_dev_info(root, argp);
case BTRFS_IOC_BALANCE:
return btrfs_ioctl_balance(file, NULL);
- case BTRFS_IOC_CLONE:
- return btrfs_ioctl_clone(file, arg, 0, 0, 0);
- case BTRFS_IOC_CLONE_RANGE:
- return btrfs_ioctl_clone_range(file, argp);
case BTRFS_IOC_TRANS_START:
return btrfs_ioctl_trans_start(file);
case BTRFS_IOC_TRANS_END:
@@ -5645,8 +5541,6 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_get_fslabel(file, argp);
case BTRFS_IOC_SET_FSLABEL:
return btrfs_ioctl_set_fslabel(file, argp);
- case BTRFS_IOC_FILE_EXTENT_SAME:
- return btrfs_ioctl_file_extent_same(file, argp);
case BTRFS_IOC_GET_SUPPORTED_FEATURES:
return btrfs_ioctl_get_supported_features(file, argp);
case BTRFS_IOC_GET_FEATURES:
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 8077461fc..d13128c70 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -56,7 +56,6 @@ void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
atomic_dec(&eb->spinning_readers);
read_unlock(&eb->lock);
}
- return;
}
/*
@@ -96,7 +95,6 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
waitqueue_active(&eb->read_lock_wq))
wake_up(&eb->read_lock_wq);
}
- return;
}
/*
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 1a33d3eb3..55161369f 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -503,7 +503,6 @@ static void cache_rbio(struct btrfs_raid_bio *rbio)
}
spin_unlock_irqrestore(&table->cache_lock, flags);
- return;
}
/*
@@ -610,13 +609,28 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
return 1;
}
+static int rbio_stripe_page_index(struct btrfs_raid_bio *rbio, int stripe,
+ int index)
+{
+ return stripe * rbio->stripe_npages + index;
+}
+
+/*
+ * these are just the pages from the rbio array, not from anything
+ * the FS sent down to us
+ */
+static struct page *rbio_stripe_page(struct btrfs_raid_bio *rbio, int stripe,
+ int index)
+{
+ return rbio->stripe_pages[rbio_stripe_page_index(rbio, stripe, index)];
+}
+
/*
* helper to index into the pstripe
*/
static struct page *rbio_pstripe_page(struct btrfs_raid_bio *rbio, int index)
{
- index += (rbio->nr_data * rbio->stripe_len) >> PAGE_CACHE_SHIFT;
- return rbio->stripe_pages[index];
+ return rbio_stripe_page(rbio, rbio->nr_data, index);
}
/*
@@ -627,10 +641,7 @@ static struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index)
{
if (rbio->nr_data + 1 == rbio->real_stripes)
return NULL;
-
- index += ((rbio->nr_data + 1) * rbio->stripe_len) >>
- PAGE_CACHE_SHIFT;
- return rbio->stripe_pages[index];
+ return rbio_stripe_page(rbio, rbio->nr_data + 1, index);
}
/*
@@ -890,6 +901,7 @@ static void raid_write_end_io(struct bio *bio)
{
struct btrfs_raid_bio *rbio = bio->bi_private;
int err = bio->bi_error;
+ int max_errors;
if (err)
fail_bio_stripe(rbio, bio);
@@ -902,11 +914,12 @@ static void raid_write_end_io(struct bio *bio)
err = 0;
/* OK, we have read all the stripes we need to. */
- if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
+ max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
+ 0 : rbio->bbio->max_errors;
+ if (atomic_read(&rbio->error) > max_errors)
err = -EIO;
rbio_orig_end_io(rbio, err);
- return;
}
/*
@@ -949,8 +962,7 @@ static struct page *page_in_rbio(struct btrfs_raid_bio *rbio,
*/
static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
{
- unsigned long nr = stripe_len * nr_stripes;
- return DIV_ROUND_UP(nr, PAGE_CACHE_SIZE);
+ return DIV_ROUND_UP(stripe_len, PAGE_CACHE_SIZE) * nr_stripes;
}
/*
@@ -968,8 +980,8 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
void *p;
rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2 +
- DIV_ROUND_UP(stripe_npages, BITS_PER_LONG / 8),
- GFP_NOFS);
+ DIV_ROUND_UP(stripe_npages, BITS_PER_LONG) *
+ sizeof(long), GFP_NOFS);
if (!rbio)
return ERR_PTR(-ENOMEM);
@@ -1023,18 +1035,17 @@ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
if (!page)
return -ENOMEM;
rbio->stripe_pages[i] = page;
- ClearPageUptodate(page);
}
return 0;
}
-/* allocate pages for just the p/q stripes */
+/* only allocate pages for p/q stripes */
static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
{
int i;
struct page *page;
- i = (rbio->nr_data * rbio->stripe_len) >> PAGE_CACHE_SHIFT;
+ i = rbio_stripe_page_index(rbio, rbio->nr_data, 0);
for (; i < rbio->nr_pages; i++) {
if (rbio->stripe_pages[i])
@@ -1123,18 +1134,6 @@ static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
}
/*
- * these are just the pages from the rbio array, not from anything
- * the FS sent down to us
- */
-static struct page *rbio_stripe_page(struct btrfs_raid_bio *rbio, int stripe, int page)
-{
- int index;
- index = stripe * (rbio->stripe_len >> PAGE_CACHE_SHIFT);
- index += page;
- return rbio->stripe_pages[index];
-}
-
-/*
* helper function to walk our bio list and populate the bio_pages array with
* the result. This seems expensive, but it is faster than constantly
* searching through the bio list as we setup the IO in finish_rmw or stripe
@@ -1177,7 +1176,6 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
{
struct btrfs_bio *bbio = rbio->bbio;
void *pointers[rbio->real_stripes];
- int stripe_len = rbio->stripe_len;
int nr_data = rbio->nr_data;
int stripe;
int pagenr;
@@ -1185,7 +1183,6 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
int q_stripe = -1;
struct bio_list bio_list;
struct bio *bio;
- int pages_per_stripe = stripe_len >> PAGE_CACHE_SHIFT;
int ret;
bio_list_init(&bio_list);
@@ -1228,7 +1225,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
else
clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
- for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
+ for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
struct page *p;
/* first collect one page from each data stripe */
for (stripe = 0; stripe < nr_data; stripe++) {
@@ -1270,7 +1267,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
* everything else.
*/
for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
- for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
+ for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
struct page *page;
if (stripe < rbio->nr_data) {
page = page_in_rbio(rbio, stripe, pagenr, 1);
@@ -1294,7 +1291,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
if (!bbio->tgtdev_map[stripe])
continue;
- for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
+ for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
struct page *page;
if (stripe < rbio->nr_data) {
page = page_in_rbio(rbio, stripe, pagenr, 1);
@@ -1508,7 +1505,6 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
int bios_to_read = 0;
struct bio_list bio_list;
int ret;
- int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
int pagenr;
int stripe;
struct bio *bio;
@@ -1527,7 +1523,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
* stripe
*/
for (stripe = 0; stripe < rbio->nr_data; stripe++) {
- for (pagenr = 0; pagenr < nr_pages; pagenr++) {
+ for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
struct page *page;
/*
* we want to find all the pages missing from
@@ -1803,7 +1799,6 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
int pagenr, stripe;
void **pointers;
int faila = -1, failb = -1;
- int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
struct page *page;
int err;
int i;
@@ -1826,7 +1821,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
index_rbio_pages(rbio);
- for (pagenr = 0; pagenr < nr_pages; pagenr++) {
+ for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
/*
* Now we just use bitmap to mark the horizontal stripes in
* which we have data when doing parity scrub.
@@ -1937,7 +1932,7 @@ pstripe:
* other endio functions will fiddle the uptodate bits
*/
if (rbio->operation == BTRFS_RBIO_WRITE) {
- for (i = 0; i < nr_pages; i++) {
+ for (i = 0; i < rbio->stripe_npages; i++) {
if (faila != -1) {
page = rbio_stripe_page(rbio, faila, i);
SetPageUptodate(page);
@@ -2033,7 +2028,6 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
int bios_to_read = 0;
struct bio_list bio_list;
int ret;
- int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
int pagenr;
int stripe;
struct bio *bio;
@@ -2057,7 +2051,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
continue;
}
- for (pagenr = 0; pagenr < nr_pages; pagenr++) {
+ for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
struct page *p;
/*
@@ -2281,37 +2275,11 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
if (!page)
return -ENOMEM;
rbio->stripe_pages[index] = page;
- ClearPageUptodate(page);
}
}
return 0;
}
-/*
- * end io function used by finish_rmw. When we finally
- * get here, we've written a full stripe
- */
-static void raid_write_parity_end_io(struct bio *bio)
-{
- struct btrfs_raid_bio *rbio = bio->bi_private;
- int err = bio->bi_error;
-
- if (bio->bi_error)
- fail_bio_stripe(rbio, bio);
-
- bio_put(bio);
-
- if (!atomic_dec_and_test(&rbio->stripes_pending))
- return;
-
- err = 0;
-
- if (atomic_read(&rbio->error))
- err = -EIO;
-
- rbio_orig_end_io(rbio, err);
-}
-
static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
int need_check)
{
@@ -2464,7 +2432,7 @@ submit_write:
break;
bio->bi_private = rbio;
- bio->bi_end_io = raid_write_parity_end_io;
+ bio->bi_end_io = raid_write_end_io;
submit_bio(WRITE, bio);
}
return;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b4ca5454e..2bd001145 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -575,7 +575,8 @@ static int is_cowonly_root(u64 root_objectid)
root_objectid == BTRFS_TREE_LOG_OBJECTID ||
root_objectid == BTRFS_CSUM_TREE_OBJECTID ||
root_objectid == BTRFS_UUID_TREE_OBJECTID ||
- root_objectid == BTRFS_QUOTA_TREE_OBJECTID)
+ root_objectid == BTRFS_QUOTA_TREE_OBJECTID ||
+ root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
return 1;
return 0;
}
@@ -708,8 +709,8 @@ struct backref_node *build_backref_tree(struct reloc_control *rc,
err = -ENOMEM;
goto out;
}
- path1->reada = 1;
- path2->reada = 2;
+ path1->reada = READA_FORWARD;
+ path2->reada = READA_FORWARD;
node = alloc_backref_node(cache);
if (!node) {
@@ -2130,7 +2131,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- path->reada = 1;
+ path->reada = READA_FORWARD;
reloc_root = root->reloc_root;
root_item = &reloc_root->root_item;
@@ -3030,7 +3031,7 @@ int prealloc_file_extent_cluster(struct inode *inode,
int ret = 0;
BUG_ON(cluster->start != cluster->boundary[0]);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = btrfs_check_data_free_space(inode, cluster->start,
cluster->end + 1 - cluster->start);
@@ -3057,7 +3058,7 @@ int prealloc_file_extent_cluster(struct inode *inode,
btrfs_free_reserved_data_space(inode, cluster->start,
cluster->end + 1 - cluster->start);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
@@ -3527,7 +3528,7 @@ static int find_data_references(struct reloc_control *rc,
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- path->reada = 1;
+ path->reada = READA_FORWARD;
root = read_fs_root(rc->extent_root->fs_info, ref_root);
if (IS_ERR(root)) {
@@ -3917,7 +3918,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- path->reada = 1;
+ path->reada = READA_FORWARD;
ret = prepare_to_relocate(rc);
if (ret) {
@@ -4343,7 +4344,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- path->reada = -1;
+ path->reada = READA_BACK;
key.objectid = BTRFS_TREE_RELOC_OBJECTID;
key.type = BTRFS_ROOT_ITEM_KEY;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index b091d94ce..92bf5ee73 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1514,8 +1514,6 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
if (sblock->no_io_error_seen)
scrub_recheck_block_checksum(sblock);
-
- return;
}
static inline int scrub_check_fsid(u8 fsid[],
@@ -2815,7 +2813,7 @@ out:
static inline int scrub_calc_parity_bitmap_len(int nsectors)
{
- return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * (BITS_PER_LONG / 8);
+ return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * sizeof(long);
}
static void scrub_parity_get(struct scrub_parity *sparity)
@@ -3460,7 +3458,7 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
return ret;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
if (em->start != chunk_offset)
goto out;
@@ -3507,7 +3505,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
if (!path)
return -ENOMEM;
- path->reada = 2;
+ path->reada = READA_FORWARD;
path->search_commit_root = 1;
path->skip_locking = 1;
@@ -3735,27 +3733,27 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
if (fs_info->scrub_workers_refcnt == 0) {
if (is_dev_replace)
fs_info->scrub_workers =
- btrfs_alloc_workqueue("btrfs-scrub", flags,
+ btrfs_alloc_workqueue("scrub", flags,
1, 4);
else
fs_info->scrub_workers =
- btrfs_alloc_workqueue("btrfs-scrub", flags,
+ btrfs_alloc_workqueue("scrub", flags,
max_active, 4);
if (!fs_info->scrub_workers)
goto fail_scrub_workers;
fs_info->scrub_wr_completion_workers =
- btrfs_alloc_workqueue("btrfs-scrubwrc", flags,
+ btrfs_alloc_workqueue("scrubwrc", flags,
max_active, 2);
if (!fs_info->scrub_wr_completion_workers)
goto fail_scrub_wr_completion_workers;
fs_info->scrub_nocow_workers =
- btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0);
+ btrfs_alloc_workqueue("scrubnc", flags, 1, 0);
if (!fs_info->scrub_nocow_workers)
goto fail_scrub_nocow_workers;
fs_info->scrub_parity_workers =
- btrfs_alloc_workqueue("btrfs-scrubparity", flags,
+ btrfs_alloc_workqueue("scrubparity", flags,
max_active, 2);
if (!fs_info->scrub_parity_workers)
goto fail_scrub_parity_workers;
@@ -4211,7 +4209,7 @@ static int check_extent_to_block(struct inode *inode, u64 start, u64 len,
io_tree = &BTRFS_I(inode)->io_tree;
- lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state);
+ lock_extent_bits(io_tree, lockstart, lockend, &cached_state);
ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
if (ordered) {
btrfs_put_ordered_extent(ordered);
@@ -4281,7 +4279,7 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
return PTR_ERR(inode);
/* Avoid truncate/dio/punch hole.. */
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
inode_dio_wait(inode);
physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
@@ -4360,7 +4358,7 @@ next_page:
}
ret = COPY_COMPLETE;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
iput(inode);
return ret;
}
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 48d425aef..02e00166c 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -22,8 +22,8 @@
#define BTRFS_SEND_STREAM_MAGIC "btrfs-stream"
#define BTRFS_SEND_STREAM_VERSION 1
-#define BTRFS_SEND_BUF_SIZE (1024 * 64)
-#define BTRFS_SEND_READ_SIZE (1024 * 48)
+#define BTRFS_SEND_BUF_SIZE SZ_64K
+#define BTRFS_SEND_READ_SIZE (48 * SZ_1K)
enum btrfs_tlv_type {
BTRFS_TLV_U8,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index fe609b81d..d41e09fe8 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -295,10 +295,11 @@ enum {
Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
- Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
- Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache,
- Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
- Opt_check_integrity, Opt_check_integrity_including_extent_data,
+ Opt_space_cache, Opt_space_cache_version, Opt_clear_cache,
+ Opt_user_subvol_rm_allowed, Opt_enospc_debug, Opt_subvolrootid,
+ Opt_defrag, Opt_inode_cache, Opt_no_space_cache, Opt_recovery,
+ Opt_skip_balance, Opt_check_integrity,
+ Opt_check_integrity_including_extent_data,
Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree,
Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
@@ -309,7 +310,7 @@ enum {
Opt_err,
};
-static match_table_t tokens = {
+static const match_table_t tokens = {
{Opt_degraded, "degraded"},
{Opt_subvol, "subvol=%s"},
{Opt_subvolid, "subvolid=%s"},
@@ -340,6 +341,7 @@ static match_table_t tokens = {
{Opt_discard, "discard"},
{Opt_nodiscard, "nodiscard"},
{Opt_space_cache, "space_cache"},
+ {Opt_space_cache_version, "space_cache=%s"},
{Opt_clear_cache, "clear_cache"},
{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
{Opt_enospc_debug, "enospc_debug"},
@@ -381,9 +383,14 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
int ret = 0;
char *compress_type;
bool compress_force = false;
+ enum btrfs_compression_type saved_compress_type;
+ bool saved_compress_force;
+ int no_compress = 0;
cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
- if (cache_gen)
+ if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE))
+ btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE);
+ else if (cache_gen)
btrfs_set_opt(info->mount_opt, SPACE_CACHE);
if (!options)
@@ -458,6 +465,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
/* Fallthrough */
case Opt_compress:
case Opt_compress_type:
+ saved_compress_type = btrfs_test_opt(root, COMPRESS) ?
+ info->compress_type : BTRFS_COMPRESS_NONE;
+ saved_compress_force =
+ btrfs_test_opt(root, FORCE_COMPRESS);
if (token == Opt_compress ||
token == Opt_compress_force ||
strcmp(args[0].from, "zlib") == 0) {
@@ -466,6 +477,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
btrfs_set_opt(info->mount_opt, COMPRESS);
btrfs_clear_opt(info->mount_opt, NODATACOW);
btrfs_clear_opt(info->mount_opt, NODATASUM);
+ no_compress = 0;
} else if (strcmp(args[0].from, "lzo") == 0) {
compress_type = "lzo";
info->compress_type = BTRFS_COMPRESS_LZO;
@@ -473,25 +485,21 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
btrfs_clear_opt(info->mount_opt, NODATACOW);
btrfs_clear_opt(info->mount_opt, NODATASUM);
btrfs_set_fs_incompat(info, COMPRESS_LZO);
+ no_compress = 0;
} else if (strncmp(args[0].from, "no", 2) == 0) {
compress_type = "no";
btrfs_clear_opt(info->mount_opt, COMPRESS);
btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
compress_force = false;
+ no_compress++;
} else {
ret = -EINVAL;
goto out;
}
if (compress_force) {
- btrfs_set_and_info(root, FORCE_COMPRESS,
- "force %s compression",
- compress_type);
+ btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
} else {
- if (!btrfs_test_opt(root, COMPRESS))
- btrfs_info(root->fs_info,
- "btrfs: use %s compression",
- compress_type);
/*
* If we remount from compress-force=xxx to
* compress=xxx, we need clear FORCE_COMPRESS
@@ -500,6 +508,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
*/
btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
}
+ if ((btrfs_test_opt(root, COMPRESS) &&
+ (info->compress_type != saved_compress_type ||
+ compress_force != saved_compress_force)) ||
+ (!btrfs_test_opt(root, COMPRESS) &&
+ no_compress == 1)) {
+ btrfs_info(root->fs_info,
+ "%s %s compression",
+ (compress_force) ? "force" : "use",
+ compress_type);
+ }
+ compress_force = false;
break;
case Opt_ssd:
btrfs_set_and_info(root, SSD,
@@ -617,15 +636,35 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
"turning off discard");
break;
case Opt_space_cache:
- btrfs_set_and_info(root, SPACE_CACHE,
- "enabling disk space caching");
+ case Opt_space_cache_version:
+ if (token == Opt_space_cache ||
+ strcmp(args[0].from, "v1") == 0) {
+ btrfs_clear_opt(root->fs_info->mount_opt,
+ FREE_SPACE_TREE);
+ btrfs_set_and_info(root, SPACE_CACHE,
+ "enabling disk space caching");
+ } else if (strcmp(args[0].from, "v2") == 0) {
+ btrfs_clear_opt(root->fs_info->mount_opt,
+ SPACE_CACHE);
+ btrfs_set_and_info(root, FREE_SPACE_TREE,
+ "enabling free space tree");
+ } else {
+ ret = -EINVAL;
+ goto out;
+ }
break;
case Opt_rescan_uuid_tree:
btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
break;
case Opt_no_space_cache:
- btrfs_clear_and_info(root, SPACE_CACHE,
- "disabling disk space caching");
+ if (btrfs_test_opt(root, SPACE_CACHE)) {
+ btrfs_clear_and_info(root, SPACE_CACHE,
+ "disabling disk space caching");
+ }
+ if (btrfs_test_opt(root, FREE_SPACE_TREE)) {
+ btrfs_clear_and_info(root, FREE_SPACE_TREE,
+ "disabling free space tree");
+ }
break;
case Opt_inode_cache:
btrfs_set_pending_and_info(info, INODE_MAP_CACHE,
@@ -754,8 +793,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
}
}
out:
+ if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE) &&
+ !btrfs_test_opt(root, FREE_SPACE_TREE) &&
+ !btrfs_test_opt(root, CLEAR_CACHE)) {
+ btrfs_err(root->fs_info, "cannot disable free space tree");
+ ret = -EINVAL;
+
+ }
if (!ret && btrfs_test_opt(root, SPACE_CACHE))
btrfs_info(root->fs_info, "disk space caching is enabled");
+ if (!ret && btrfs_test_opt(root, FREE_SPACE_TREE))
+ btrfs_info(root->fs_info, "using free space tree");
kfree(orig);
return ret;
}
@@ -1162,6 +1210,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
seq_puts(seq, ",noacl");
if (btrfs_test_opt(root, SPACE_CACHE))
seq_puts(seq, ",space_cache");
+ else if (btrfs_test_opt(root, FREE_SPACE_TREE))
+ seq_puts(seq, ",space_cache=v2");
else
seq_puts(seq, ",nospace_cache");
if (btrfs_test_opt(root, RESCAN_UUID_TREE))
@@ -1514,9 +1564,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
if ((flags ^ s->s_flags) & MS_RDONLY)
error = -EBUSY;
} else {
- char b[BDEVNAME_SIZE];
-
- strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
+ snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
btrfs_sb(s)->bdev_holder = fs_type;
error = btrfs_fill_super(s, fs_devices, data,
flags & MS_SILENT ? 1 : 0);
@@ -1865,7 +1913,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
* btrfs starts at an offset of at least 1MB when doing chunk
* allocation.
*/
- skip_space = 1024 * 1024;
+ skip_space = SZ_1M;
/* user can set the offset in fs_info->alloc_start. */
if (fs_info->alloc_start &&
@@ -2249,6 +2297,9 @@ static int btrfs_run_sanity_tests(void)
if (ret)
goto out;
ret = btrfs_test_qgroups();
+ if (ret)
+ goto out;
+ ret = btrfs_test_free_space_tree();
out:
btrfs_destroy_test_fs();
return ret;
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index e0ac85949..539e7b5e3 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -202,6 +202,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF);
BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56);
BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
+BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
static struct attribute *btrfs_supported_feature_attrs[] = {
BTRFS_FEAT_ATTR_PTR(mixed_backref),
@@ -213,6 +214,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
BTRFS_FEAT_ATTR_PTR(raid56),
BTRFS_FEAT_ATTR_PTR(skinny_metadata),
BTRFS_FEAT_ATTR_PTR(no_holes),
+ BTRFS_FEAT_ATTR_PTR(free_space_tree),
NULL
};
@@ -780,6 +782,39 @@ failure:
return error;
}
+
+/*
+ * Change per-fs features in /sys/fs/btrfs/UUID/features to match current
+ * values in superblock. Call after any changes to incompat/compat_ro flags
+ */
+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
+ u64 bit, enum btrfs_feature_set set)
+{
+ struct btrfs_fs_devices *fs_devs;
+ struct kobject *fsid_kobj;
+ u64 features;
+ int ret;
+
+ if (!fs_info)
+ return;
+
+ features = get_features(fs_info, set);
+ ASSERT(bit & supported_feature_masks[set]);
+
+ fs_devs = fs_info->fs_devices;
+ fsid_kobj = &fs_devs->fsid_kobj;
+
+ if (!fsid_kobj->state_initialized)
+ return;
+
+ /*
+ * FIXME: this is too heavy to update just one value, ideally we'd like
+ * to use sysfs_update_group but some refactoring is needed first.
+ */
+ sysfs_remove_group(fsid_kobj, &btrfs_feature_attr_group);
+ ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group);
+}
+
static int btrfs_init_debugfs(void)
{
#ifdef CONFIG_DEBUG_FS
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index 9c0952212..d7da1a4c2 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -56,7 +56,7 @@ static struct btrfs_feature_attr btrfs_attr_##_name = { \
#define BTRFS_FEAT_ATTR_COMPAT(name, feature) \
BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature)
#define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \
- BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT, feature)
+ BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT_RO, feature)
#define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \
BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
@@ -90,4 +90,7 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
struct kobject *parent);
int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs);
void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
+ u64 bit, enum btrfs_feature_set set);
+
#endif /* _BTRFS_SYSFS_H_ */
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 9626252ee..0e1e61a7e 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -21,6 +21,9 @@
#include <linux/magic.h>
#include "btrfs-tests.h"
#include "../ctree.h"
+#include "../free-space-cache.h"
+#include "../free-space-tree.h"
+#include "../transaction.h"
#include "../volumes.h"
#include "../disk-io.h"
#include "../qgroup.h"
@@ -79,18 +82,18 @@ void btrfs_destroy_test_fs(void)
struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void)
{
struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
- GFP_NOFS);
+ GFP_KERNEL);
if (!fs_info)
return fs_info;
fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices),
- GFP_NOFS);
+ GFP_KERNEL);
if (!fs_info->fs_devices) {
kfree(fs_info);
return NULL;
}
fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block),
- GFP_NOFS);
+ GFP_KERNEL);
if (!fs_info->super_copy) {
kfree(fs_info->fs_devices);
kfree(fs_info);
@@ -122,6 +125,9 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void)
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
+ extent_io_tree_init(&fs_info->freed_extents[0], NULL);
+ extent_io_tree_init(&fs_info->freed_extents[1], NULL);
+ fs_info->pinned_extents = &fs_info->freed_extents[0];
return fs_info;
}
@@ -169,3 +175,55 @@ void btrfs_free_dummy_root(struct btrfs_root *root)
kfree(root);
}
+struct btrfs_block_group_cache *
+btrfs_alloc_dummy_block_group(unsigned long length)
+{
+ struct btrfs_block_group_cache *cache;
+
+ cache = kzalloc(sizeof(*cache), GFP_KERNEL);
+ if (!cache)
+ return NULL;
+ cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
+ GFP_KERNEL);
+ if (!cache->free_space_ctl) {
+ kfree(cache);
+ return NULL;
+ }
+ cache->fs_info = btrfs_alloc_dummy_fs_info();
+ if (!cache->fs_info) {
+ kfree(cache->free_space_ctl);
+ kfree(cache);
+ return NULL;
+ }
+
+ cache->key.objectid = 0;
+ cache->key.offset = length;
+ cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+ cache->sectorsize = 4096;
+ cache->full_stripe_len = 4096;
+
+ INIT_LIST_HEAD(&cache->list);
+ INIT_LIST_HEAD(&cache->cluster_list);
+ INIT_LIST_HEAD(&cache->bg_list);
+ btrfs_init_free_space_ctl(cache);
+ mutex_init(&cache->free_space_lock);
+
+ return cache;
+}
+
+void btrfs_free_dummy_block_group(struct btrfs_block_group_cache *cache)
+{
+ if (!cache)
+ return;
+ __btrfs_remove_free_space_cache(cache->free_space_ctl);
+ kfree(cache->free_space_ctl);
+ kfree(cache);
+}
+
+void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans)
+{
+ memset(trans, 0, sizeof(*trans));
+ trans->transid = 1;
+ INIT_LIST_HEAD(&trans->qgroup_ref_list);
+ trans->type = __TRANS_DUMMY;
+}
diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h
index fd3954224..054b8c73c 100644
--- a/fs/btrfs/tests/btrfs-tests.h
+++ b/fs/btrfs/tests/btrfs-tests.h
@@ -24,17 +24,23 @@
#define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt, ##__VA_ARGS__)
struct btrfs_root;
+struct btrfs_trans_handle;
int btrfs_test_free_space_cache(void);
int btrfs_test_extent_buffer_operations(void);
int btrfs_test_extent_io(void);
int btrfs_test_inodes(void);
int btrfs_test_qgroups(void);
+int btrfs_test_free_space_tree(void);
int btrfs_init_test_fs(void);
void btrfs_destroy_test_fs(void);
struct inode *btrfs_new_test_inode(void);
struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void);
void btrfs_free_dummy_root(struct btrfs_root *root);
+struct btrfs_block_group_cache *
+btrfs_alloc_dummy_block_group(unsigned long length);
+void btrfs_free_dummy_block_group(struct btrfs_block_group_cache *cache);
+void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans);
#else
static inline int btrfs_test_free_space_cache(void)
{
@@ -63,6 +69,10 @@ static inline int btrfs_test_qgroups(void)
{
return 0;
}
+static inline int btrfs_test_free_space_tree(void)
+{
+ return 0;
+}
#endif
#endif
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 9e9f23681..669b58201 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -18,6 +18,8 @@
#include <linux/pagemap.h>
#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/sizes.h>
#include "btrfs-tests.h"
#include "../extent_io.h"
@@ -70,12 +72,14 @@ static int test_find_delalloc(void)
struct page *page;
struct page *locked_page = NULL;
unsigned long index = 0;
- u64 total_dirty = 256 * 1024 * 1024;
- u64 max_bytes = 128 * 1024 * 1024;
+ u64 total_dirty = SZ_256M;
+ u64 max_bytes = SZ_128M;
u64 start, end, test_start;
u64 found;
int ret = -EINVAL;
+ test_msg("Running find delalloc tests\n");
+
inode = btrfs_new_test_inode();
if (!inode) {
test_msg("Failed to allocate test inode\n");
@@ -90,7 +94,7 @@ static int test_find_delalloc(void)
* test.
*/
for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) {
- page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+ page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
if (!page) {
test_msg("Failed to allocate test page\n");
ret = -ENOMEM;
@@ -109,7 +113,7 @@ static int test_find_delalloc(void)
* |--- delalloc ---|
* |--- search ---|
*/
- set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS);
+ set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_KERNEL);
start = 0;
end = 0;
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -133,14 +137,14 @@ static int test_find_delalloc(void)
* |--- delalloc ---|
* |--- search ---|
*/
- test_start = 64 * 1024 * 1024;
+ test_start = SZ_64M;
locked_page = find_lock_page(inode->i_mapping,
test_start >> PAGE_CACHE_SHIFT);
if (!locked_page) {
test_msg("Couldn't find the locked page\n");
goto out_bits;
}
- set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS);
+ set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_KERNEL);
start = test_start;
end = 0;
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -195,7 +199,7 @@ static int test_find_delalloc(void)
*
* We are re-using our test_start from above since it works out well.
*/
- set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS);
+ set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_KERNEL);
start = test_start;
end = 0;
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -220,8 +224,8 @@ static int test_find_delalloc(void)
* Now to test where we run into a page that is no longer dirty in the
* range we want to find.
*/
- page = find_get_page(inode->i_mapping, (max_bytes + (1 * 1024 * 1024))
- >> PAGE_CACHE_SHIFT);
+ page = find_get_page(inode->i_mapping,
+ (max_bytes + SZ_1M) >> PAGE_CACHE_SHIFT);
if (!page) {
test_msg("Couldn't find our page\n");
goto out_bits;
@@ -258,7 +262,7 @@ static int test_find_delalloc(void)
}
ret = 0;
out_bits:
- clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS);
+ clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_KERNEL);
out:
if (locked_page)
page_cache_release(locked_page);
@@ -268,8 +272,139 @@ out:
return ret;
}
+static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
+ unsigned long len)
+{
+ unsigned long i, x;
+
+ memset(bitmap, 0, len);
+ memset_extent_buffer(eb, 0, 0, len);
+ if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+ test_msg("Bitmap was not zeroed\n");
+ return -EINVAL;
+ }
+
+ bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
+ extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
+ if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+ test_msg("Setting all bits failed\n");
+ return -EINVAL;
+ }
+
+ bitmap_clear(bitmap, 0, len * BITS_PER_BYTE);
+ extent_buffer_bitmap_clear(eb, 0, 0, len * BITS_PER_BYTE);
+ if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+ test_msg("Clearing all bits failed\n");
+ return -EINVAL;
+ }
+
+ bitmap_set(bitmap, (PAGE_CACHE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
+ sizeof(long) * BITS_PER_BYTE);
+ extent_buffer_bitmap_set(eb, PAGE_CACHE_SIZE - sizeof(long) / 2, 0,
+ sizeof(long) * BITS_PER_BYTE);
+ if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+ test_msg("Setting straddling pages failed\n");
+ return -EINVAL;
+ }
+
+ bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
+ bitmap_clear(bitmap,
+ (PAGE_CACHE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
+ sizeof(long) * BITS_PER_BYTE);
+ extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
+ extent_buffer_bitmap_clear(eb, PAGE_CACHE_SIZE - sizeof(long) / 2, 0,
+ sizeof(long) * BITS_PER_BYTE);
+ if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+ test_msg("Clearing straddling pages failed\n");
+ return -EINVAL;
+ }
+
+ /*
+ * Generate a wonky pseudo-random bit pattern for the sake of not using
+ * something repetitive that could miss some hypothetical off-by-n bug.
+ */
+ x = 0;
+ for (i = 0; i < len / sizeof(long); i++) {
+ x = (0x19660dULL * (u64)x + 0x3c6ef35fULL) & 0xffffffffUL;
+ bitmap[i] = x;
+ }
+ write_extent_buffer(eb, bitmap, 0, len);
+
+ for (i = 0; i < len * BITS_PER_BYTE; i++) {
+ int bit, bit1;
+
+ bit = !!test_bit(i, bitmap);
+ bit1 = !!extent_buffer_test_bit(eb, 0, i);
+ if (bit1 != bit) {
+ test_msg("Testing bit pattern failed\n");
+ return -EINVAL;
+ }
+
+ bit1 = !!extent_buffer_test_bit(eb, i / BITS_PER_BYTE,
+ i % BITS_PER_BYTE);
+ if (bit1 != bit) {
+ test_msg("Testing bit pattern with offset failed\n");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int test_eb_bitmaps(void)
+{
+ unsigned long len = PAGE_CACHE_SIZE * 4;
+ unsigned long *bitmap;
+ struct extent_buffer *eb;
+ int ret;
+
+ test_msg("Running extent buffer bitmap tests\n");
+
+ bitmap = kmalloc(len, GFP_KERNEL);
+ if (!bitmap) {
+ test_msg("Couldn't allocate test bitmap\n");
+ return -ENOMEM;
+ }
+
+ eb = __alloc_dummy_extent_buffer(NULL, 0, len);
+ if (!eb) {
+ test_msg("Couldn't allocate test extent buffer\n");
+ kfree(bitmap);
+ return -ENOMEM;
+ }
+
+ ret = __test_eb_bitmaps(bitmap, eb, len);
+ if (ret)
+ goto out;
+
+ /* Do it over again with an extent buffer which isn't page-aligned. */
+ free_extent_buffer(eb);
+ eb = __alloc_dummy_extent_buffer(NULL, PAGE_CACHE_SIZE / 2, len);
+ if (!eb) {
+ test_msg("Couldn't allocate test extent buffer\n");
+ kfree(bitmap);
+ return -ENOMEM;
+ }
+
+ ret = __test_eb_bitmaps(bitmap, eb, len);
+out:
+ free_extent_buffer(eb);
+ kfree(bitmap);
+ return ret;
+}
+
int btrfs_test_extent_io(void)
{
- test_msg("Running find delalloc tests\n");
- return test_find_delalloc();
+ int ret;
+
+ test_msg("Running extent I/O tests\n");
+
+ ret = test_find_delalloc();
+ if (ret)
+ goto out;
+
+ ret = test_eb_bitmaps();
+out:
+ test_msg("Extent I/O tests finished\n");
+ return ret;
}
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c
index 8b72b005b..c9ad97b1e 100644
--- a/fs/btrfs/tests/free-space-tests.c
+++ b/fs/btrfs/tests/free-space-tests.c
@@ -23,41 +23,6 @@
#include "../free-space-cache.h"
#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
-static struct btrfs_block_group_cache *init_test_block_group(void)
-{
- struct btrfs_block_group_cache *cache;
-
- cache = kzalloc(sizeof(*cache), GFP_NOFS);
- if (!cache)
- return NULL;
- cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
- GFP_NOFS);
- if (!cache->free_space_ctl) {
- kfree(cache);
- return NULL;
- }
- cache->fs_info = btrfs_alloc_dummy_fs_info();
- if (!cache->fs_info) {
- kfree(cache->free_space_ctl);
- kfree(cache);
- return NULL;
- }
-
- cache->key.objectid = 0;
- cache->key.offset = 1024 * 1024 * 1024;
- cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
- cache->sectorsize = 4096;
- cache->full_stripe_len = 4096;
-
- spin_lock_init(&cache->lock);
- INIT_LIST_HEAD(&cache->list);
- INIT_LIST_HEAD(&cache->cluster_list);
- INIT_LIST_HEAD(&cache->bg_list);
-
- btrfs_init_free_space_ctl(cache);
-
- return cache;
-}
/*
* This test just does basic sanity checking, making sure we can add an exten
@@ -71,59 +36,59 @@ static int test_extents(struct btrfs_block_group_cache *cache)
test_msg("Running extent only tests\n");
/* First just make sure we can remove an entire entry */
- ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
+ ret = btrfs_add_free_space(cache, 0, SZ_4M);
if (ret) {
test_msg("Error adding initial extents %d\n", ret);
return ret;
}
- ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
+ ret = btrfs_remove_free_space(cache, 0, SZ_4M);
if (ret) {
test_msg("Error removing extent %d\n", ret);
return ret;
}
- if (test_check_exists(cache, 0, 4 * 1024 * 1024)) {
+ if (test_check_exists(cache, 0, SZ_4M)) {
test_msg("Full remove left some lingering space\n");
return -1;
}
/* Ok edge and middle cases now */
- ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
+ ret = btrfs_add_free_space(cache, 0, SZ_4M);
if (ret) {
test_msg("Error adding half extent %d\n", ret);
return ret;
}
- ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024);
+ ret = btrfs_remove_free_space(cache, 3 * SZ_1M, SZ_1M);
if (ret) {
test_msg("Error removing tail end %d\n", ret);
return ret;
}
- ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
+ ret = btrfs_remove_free_space(cache, 0, SZ_1M);
if (ret) {
test_msg("Error removing front end %d\n", ret);
return ret;
}
- ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096);
+ ret = btrfs_remove_free_space(cache, SZ_2M, 4096);
if (ret) {
test_msg("Error removing middle piece %d\n", ret);
return ret;
}
- if (test_check_exists(cache, 0, 1 * 1024 * 1024)) {
+ if (test_check_exists(cache, 0, SZ_1M)) {
test_msg("Still have space at the front\n");
return -1;
}
- if (test_check_exists(cache, 2 * 1024 * 1024, 4096)) {
+ if (test_check_exists(cache, SZ_2M, 4096)) {
test_msg("Still have space in the middle\n");
return -1;
}
- if (test_check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) {
+ if (test_check_exists(cache, 3 * SZ_1M, SZ_1M)) {
test_msg("Still have space at the end\n");
return -1;
}
@@ -141,30 +106,30 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache)
test_msg("Running bitmap only tests\n");
- ret = test_add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
+ ret = test_add_free_space_entry(cache, 0, SZ_4M, 1);
if (ret) {
test_msg("Couldn't create a bitmap entry %d\n", ret);
return ret;
}
- ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
+ ret = btrfs_remove_free_space(cache, 0, SZ_4M);
if (ret) {
test_msg("Error removing bitmap full range %d\n", ret);
return ret;
}
- if (test_check_exists(cache, 0, 4 * 1024 * 1024)) {
+ if (test_check_exists(cache, 0, SZ_4M)) {
test_msg("Left some space in bitmap\n");
return -1;
}
- ret = test_add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
+ ret = test_add_free_space_entry(cache, 0, SZ_4M, 1);
if (ret) {
test_msg("Couldn't add to our bitmap entry %d\n", ret);
return ret;
}
- ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024);
+ ret = btrfs_remove_free_space(cache, SZ_1M, SZ_2M);
if (ret) {
test_msg("Couldn't remove middle chunk %d\n", ret);
return ret;
@@ -177,23 +142,21 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache)
next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
/* Test a bit straddling two bitmaps */
- ret = test_add_free_space_entry(cache, next_bitmap_offset -
- (2 * 1024 * 1024), 4 * 1024 * 1024, 1);
+ ret = test_add_free_space_entry(cache, next_bitmap_offset - SZ_2M,
+ SZ_4M, 1);
if (ret) {
test_msg("Couldn't add space that straddles two bitmaps %d\n",
ret);
return ret;
}
- ret = btrfs_remove_free_space(cache, next_bitmap_offset -
- (1 * 1024 * 1024), 2 * 1024 * 1024);
+ ret = btrfs_remove_free_space(cache, next_bitmap_offset - SZ_1M, SZ_2M);
if (ret) {
test_msg("Couldn't remove overlapping space %d\n", ret);
return ret;
}
- if (test_check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024),
- 2 * 1024 * 1024)) {
+ if (test_check_exists(cache, next_bitmap_offset - SZ_1M, SZ_2M)) {
test_msg("Left some space when removing overlapping\n");
return -1;
}
@@ -216,43 +179,43 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache)
* bitmap, but the free space completely in the extent and then
* completely in the bitmap.
*/
- ret = test_add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1);
+ ret = test_add_free_space_entry(cache, SZ_4M, SZ_1M, 1);
if (ret) {
test_msg("Couldn't create bitmap entry %d\n", ret);
return ret;
}
- ret = test_add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
+ ret = test_add_free_space_entry(cache, 0, SZ_1M, 0);
if (ret) {
test_msg("Couldn't add extent entry %d\n", ret);
return ret;
}
- ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
+ ret = btrfs_remove_free_space(cache, 0, SZ_1M);
if (ret) {
test_msg("Couldn't remove extent entry %d\n", ret);
return ret;
}
- if (test_check_exists(cache, 0, 1 * 1024 * 1024)) {
+ if (test_check_exists(cache, 0, SZ_1M)) {
test_msg("Left remnants after our remove\n");
return -1;
}
/* Now to add back the extent entry and remove from the bitmap */
- ret = test_add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
+ ret = test_add_free_space_entry(cache, 0, SZ_1M, 0);
if (ret) {
test_msg("Couldn't re-add extent entry %d\n", ret);
return ret;
}
- ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024);
+ ret = btrfs_remove_free_space(cache, SZ_4M, SZ_1M);
if (ret) {
test_msg("Couldn't remove from bitmap %d\n", ret);
return ret;
}
- if (test_check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) {
+ if (test_check_exists(cache, SZ_4M, SZ_1M)) {
test_msg("Left remnants in the bitmap\n");
return -1;
}
@@ -261,19 +224,19 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache)
* Ok so a little more evil, extent entry and bitmap at the same offset,
* removing an overlapping chunk.
*/
- ret = test_add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1);
+ ret = test_add_free_space_entry(cache, SZ_1M, SZ_4M, 1);
if (ret) {
test_msg("Couldn't add to a bitmap %d\n", ret);
return ret;
}
- ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024);
+ ret = btrfs_remove_free_space(cache, SZ_512K, 3 * SZ_1M);
if (ret) {
test_msg("Couldn't remove overlapping space %d\n", ret);
return ret;
}
- if (test_check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) {
+ if (test_check_exists(cache, SZ_512K, 3 * SZ_1M)) {
test_msg("Left over pieces after removing overlapping\n");
return -1;
}
@@ -281,25 +244,25 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache)
__btrfs_remove_free_space_cache(cache->free_space_ctl);
/* Now with the extent entry offset into the bitmap */
- ret = test_add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1);
+ ret = test_add_free_space_entry(cache, SZ_4M, SZ_4M, 1);
if (ret) {
test_msg("Couldn't add space to the bitmap %d\n", ret);
return ret;
}
- ret = test_add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0);
+ ret = test_add_free_space_entry(cache, SZ_2M, SZ_2M, 0);
if (ret) {
test_msg("Couldn't add extent to the cache %d\n", ret);
return ret;
}
- ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024);
+ ret = btrfs_remove_free_space(cache, 3 * SZ_1M, SZ_4M);
if (ret) {
test_msg("Problem removing overlapping space %d\n", ret);
return ret;
}
- if (test_check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) {
+ if (test_check_exists(cache, 3 * SZ_1M, SZ_4M)) {
test_msg("Left something behind when removing space");
return -1;
}
@@ -315,29 +278,26 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache)
* [ del ]
*/
__btrfs_remove_free_space_cache(cache->free_space_ctl);
- ret = test_add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024,
- 4 * 1024 * 1024, 1);
+ ret = test_add_free_space_entry(cache, bitmap_offset + SZ_4M, SZ_4M, 1);
if (ret) {
test_msg("Couldn't add bitmap %d\n", ret);
return ret;
}
- ret = test_add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024,
- 5 * 1024 * 1024, 0);
+ ret = test_add_free_space_entry(cache, bitmap_offset - SZ_1M,
+ 5 * SZ_1M, 0);
if (ret) {
test_msg("Couldn't add extent entry %d\n", ret);
return ret;
}
- ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024,
- 5 * 1024 * 1024);
+ ret = btrfs_remove_free_space(cache, bitmap_offset + SZ_1M, 5 * SZ_1M);
if (ret) {
test_msg("Failed to free our space %d\n", ret);
return ret;
}
- if (test_check_exists(cache, bitmap_offset + 1 * 1024 * 1024,
- 5 * 1024 * 1024)) {
+ if (test_check_exists(cache, bitmap_offset + SZ_1M, 5 * SZ_1M)) {
test_msg("Left stuff over\n");
return -1;
}
@@ -350,19 +310,19 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache)
* to return -EAGAIN back from btrfs_remove_extent, make sure this
* doesn't happen.
*/
- ret = test_add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1);
+ ret = test_add_free_space_entry(cache, SZ_1M, SZ_2M, 1);
if (ret) {
test_msg("Couldn't add bitmap entry %d\n", ret);
return ret;
}
- ret = test_add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0);
+ ret = test_add_free_space_entry(cache, 3 * SZ_1M, SZ_1M, 0);
if (ret) {
test_msg("Couldn't add extent entry %d\n", ret);
return ret;
}
- ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024);
+ ret = btrfs_remove_free_space(cache, SZ_1M, 3 * SZ_1M);
if (ret) {
test_msg("Error removing bitmap and extent overlapping %d\n", ret);
return ret;
@@ -445,9 +405,11 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
int ret;
u64 offset;
u64 max_extent_size;
-
- bool (*use_bitmap_op)(struct btrfs_free_space_ctl *,
- struct btrfs_free_space *);
+ const struct btrfs_free_space_op test_free_space_ops = {
+ .recalc_thresholds = cache->free_space_ctl->op->recalc_thresholds,
+ .use_bitmap = test_use_bitmap,
+ };
+ const struct btrfs_free_space_op *orig_free_space_ops;
test_msg("Running space stealing from bitmap to extent\n");
@@ -469,22 +431,21 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
* that forces use of bitmaps as soon as we have at least 1
* extent entry.
*/
- use_bitmap_op = cache->free_space_ctl->op->use_bitmap;
- cache->free_space_ctl->op->use_bitmap = test_use_bitmap;
+ orig_free_space_ops = cache->free_space_ctl->op;
+ cache->free_space_ctl->op = &test_free_space_ops;
/*
* Extent entry covering free space range [128Mb - 256Kb, 128Mb - 128Kb[
*/
- ret = test_add_free_space_entry(cache, 128 * 1024 * 1024 - 256 * 1024,
- 128 * 1024, 0);
+ ret = test_add_free_space_entry(cache, SZ_128M - SZ_256K, SZ_128K, 0);
if (ret) {
test_msg("Couldn't add extent entry %d\n", ret);
return ret;
}
/* Bitmap entry covering free space range [128Mb + 512Kb, 256Mb[ */
- ret = test_add_free_space_entry(cache, 128 * 1024 * 1024 + 512 * 1024,
- 128 * 1024 * 1024 - 512 * 1024, 1);
+ ret = test_add_free_space_entry(cache, SZ_128M + SZ_512K,
+ SZ_128M - SZ_512K, 1);
if (ret) {
test_msg("Couldn't add bitmap entry %d\n", ret);
return ret;
@@ -502,21 +463,19 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
* [128Mb + 512Kb, 128Mb + 768Kb[
*/
ret = btrfs_remove_free_space(cache,
- 128 * 1024 * 1024 + 768 * 1024,
- 128 * 1024 * 1024 - 768 * 1024);
+ SZ_128M + 768 * SZ_1K,
+ SZ_128M - 768 * SZ_1K);
if (ret) {
test_msg("Failed to free part of bitmap space %d\n", ret);
return ret;
}
/* Confirm that only those 2 ranges are marked as free. */
- if (!test_check_exists(cache, 128 * 1024 * 1024 - 256 * 1024,
- 128 * 1024)) {
+ if (!test_check_exists(cache, SZ_128M - SZ_256K, SZ_128K)) {
test_msg("Free space range missing\n");
return -ENOENT;
}
- if (!test_check_exists(cache, 128 * 1024 * 1024 + 512 * 1024,
- 256 * 1024)) {
+ if (!test_check_exists(cache, SZ_128M + SZ_512K, SZ_256K)) {
test_msg("Free space range missing\n");
return -ENOENT;
}
@@ -525,8 +484,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
* Confirm that the bitmap range [128Mb + 768Kb, 256Mb[ isn't marked
* as free anymore.
*/
- if (test_check_exists(cache, 128 * 1024 * 1024 + 768 * 1024,
- 128 * 1024 * 1024 - 768 * 1024)) {
+ if (test_check_exists(cache, SZ_128M + 768 * SZ_1K,
+ SZ_128M - 768 * SZ_1K)) {
test_msg("Bitmap region not removed from space cache\n");
return -EINVAL;
}
@@ -535,8 +494,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
* Confirm that the region [128Mb + 256Kb, 128Mb + 512Kb[, which is
* covered by the bitmap, isn't marked as free.
*/
- if (test_check_exists(cache, 128 * 1024 * 1024 + 256 * 1024,
- 256 * 1024)) {
+ if (test_check_exists(cache, SZ_128M + SZ_256K, SZ_256K)) {
test_msg("Invalid bitmap region marked as free\n");
return -EINVAL;
}
@@ -545,8 +503,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
* Confirm that the region [128Mb, 128Mb + 256Kb[, which is covered
* by the bitmap too, isn't marked as free either.
*/
- if (test_check_exists(cache, 128 * 1024 * 1024,
- 256 * 1024)) {
+ if (test_check_exists(cache, SZ_128M, SZ_256K)) {
test_msg("Invalid bitmap region marked as free\n");
return -EINVAL;
}
@@ -556,13 +513,13 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
* lets make sure the free space cache marks it as free in the bitmap,
* and doesn't insert a new extent entry to represent this region.
*/
- ret = btrfs_add_free_space(cache, 128 * 1024 * 1024, 512 * 1024);
+ ret = btrfs_add_free_space(cache, SZ_128M, SZ_512K);
if (ret) {
test_msg("Error adding free space: %d\n", ret);
return ret;
}
/* Confirm the region is marked as free. */
- if (!test_check_exists(cache, 128 * 1024 * 1024, 512 * 1024)) {
+ if (!test_check_exists(cache, SZ_128M, SZ_512K)) {
test_msg("Bitmap region not marked as free\n");
return -ENOENT;
}
@@ -581,8 +538,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
* The goal is to test that the bitmap entry space stealing doesn't
* steal this space region.
*/
- ret = btrfs_add_free_space(cache, 128 * 1024 * 1024 + 16 * 1024 * 1024,
- 4096);
+ ret = btrfs_add_free_space(cache, SZ_128M + SZ_16M, 4096);
if (ret) {
test_msg("Error adding free space: %d\n", ret);
return ret;
@@ -601,15 +557,13 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
* expand the range covered by the existing extent entry that represents
* the free space [128Mb - 256Kb, 128Mb - 128Kb[.
*/
- ret = btrfs_add_free_space(cache, 128 * 1024 * 1024 - 128 * 1024,
- 128 * 1024);
+ ret = btrfs_add_free_space(cache, SZ_128M - SZ_128K, SZ_128K);
if (ret) {
test_msg("Error adding free space: %d\n", ret);
return ret;
}
/* Confirm the region is marked as free. */
- if (!test_check_exists(cache, 128 * 1024 * 1024 - 128 * 1024,
- 128 * 1024)) {
+ if (!test_check_exists(cache, SZ_128M - SZ_128K, SZ_128K)) {
test_msg("Extent region not marked as free\n");
return -ENOENT;
}
@@ -637,21 +591,20 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
* that represents the 1Mb free space, and therefore we're able to
* allocate the whole free space at once.
*/
- if (!test_check_exists(cache, 128 * 1024 * 1024 - 256 * 1024,
- 1 * 1024 * 1024)) {
+ if (!test_check_exists(cache, SZ_128M - SZ_256K, SZ_1M)) {
test_msg("Expected region not marked as free\n");
return -ENOENT;
}
- if (cache->free_space_ctl->free_space != (1 * 1024 * 1024 + 4096)) {
+ if (cache->free_space_ctl->free_space != (SZ_1M + 4096)) {
test_msg("Cache free space is not 1Mb + 4Kb\n");
return -EINVAL;
}
offset = btrfs_find_space_for_alloc(cache,
- 0, 1 * 1024 * 1024, 0,
+ 0, SZ_1M, 0,
&max_extent_size);
- if (offset != (128 * 1024 * 1024 - 256 * 1024)) {
+ if (offset != (SZ_128M - SZ_256K)) {
test_msg("Failed to allocate 1Mb from space cache, returned offset is: %llu\n",
offset);
return -EINVAL;
@@ -670,7 +623,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
offset = btrfs_find_space_for_alloc(cache,
0, 4096, 0,
&max_extent_size);
- if (offset != (128 * 1024 * 1024 + 16 * 1024 * 1024)) {
+ if (offset != (SZ_128M + SZ_16M)) {
test_msg("Failed to allocate 4Kb from space cache, returned offset is: %llu\n",
offset);
return -EINVAL;
@@ -691,16 +644,14 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
/*
* Extent entry covering free space range [128Mb + 128Kb, 128Mb + 256Kb[
*/
- ret = test_add_free_space_entry(cache, 128 * 1024 * 1024 + 128 * 1024,
- 128 * 1024, 0);
+ ret = test_add_free_space_entry(cache, SZ_128M + SZ_128K, SZ_128K, 0);
if (ret) {
test_msg("Couldn't add extent entry %d\n", ret);
return ret;
}
/* Bitmap entry covering free space range [0, 128Mb - 512Kb[ */
- ret = test_add_free_space_entry(cache, 0,
- 128 * 1024 * 1024 - 512 * 1024, 1);
+ ret = test_add_free_space_entry(cache, 0, SZ_128M - SZ_512K, 1);
if (ret) {
test_msg("Couldn't add bitmap entry %d\n", ret);
return ret;
@@ -717,22 +668,18 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
* [128Mb + 128b, 128Mb + 256Kb[
* [128Mb - 768Kb, 128Mb - 512Kb[
*/
- ret = btrfs_remove_free_space(cache,
- 0,
- 128 * 1024 * 1024 - 768 * 1024);
+ ret = btrfs_remove_free_space(cache, 0, SZ_128M - 768 * SZ_1K);
if (ret) {
test_msg("Failed to free part of bitmap space %d\n", ret);
return ret;
}
/* Confirm that only those 2 ranges are marked as free. */
- if (!test_check_exists(cache, 128 * 1024 * 1024 + 128 * 1024,
- 128 * 1024)) {
+ if (!test_check_exists(cache, SZ_128M + SZ_128K, SZ_128K)) {
test_msg("Free space range missing\n");
return -ENOENT;
}
- if (!test_check_exists(cache, 128 * 1024 * 1024 - 768 * 1024,
- 256 * 1024)) {
+ if (!test_check_exists(cache, SZ_128M - 768 * SZ_1K, SZ_256K)) {
test_msg("Free space range missing\n");
return -ENOENT;
}
@@ -741,8 +688,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
* Confirm that the bitmap range [0, 128Mb - 768Kb[ isn't marked
* as free anymore.
*/
- if (test_check_exists(cache, 0,
- 128 * 1024 * 1024 - 768 * 1024)) {
+ if (test_check_exists(cache, 0, SZ_128M - 768 * SZ_1K)) {
test_msg("Bitmap region not removed from space cache\n");
return -EINVAL;
}
@@ -751,8 +697,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
* Confirm that the region [128Mb - 512Kb, 128Mb[, which is
* covered by the bitmap, isn't marked as free.
*/
- if (test_check_exists(cache, 128 * 1024 * 1024 - 512 * 1024,
- 512 * 1024)) {
+ if (test_check_exists(cache, SZ_128M - SZ_512K, SZ_512K)) {
test_msg("Invalid bitmap region marked as free\n");
return -EINVAL;
}
@@ -762,15 +707,13 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
* lets make sure the free space cache marks it as free in the bitmap,
* and doesn't insert a new extent entry to represent this region.
*/
- ret = btrfs_add_free_space(cache, 128 * 1024 * 1024 - 512 * 1024,
- 512 * 1024);
+ ret = btrfs_add_free_space(cache, SZ_128M - SZ_512K, SZ_512K);
if (ret) {
test_msg("Error adding free space: %d\n", ret);
return ret;
}
/* Confirm the region is marked as free. */
- if (!test_check_exists(cache, 128 * 1024 * 1024 - 512 * 1024,
- 512 * 1024)) {
+ if (!test_check_exists(cache, SZ_128M - SZ_512K, SZ_512K)) {
test_msg("Bitmap region not marked as free\n");
return -ENOENT;
}
@@ -789,7 +732,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
* The goal is to test that the bitmap entry space stealing doesn't
* steal this space region.
*/
- ret = btrfs_add_free_space(cache, 32 * 1024 * 1024, 8192);
+ ret = btrfs_add_free_space(cache, SZ_32M, 8192);
if (ret) {
test_msg("Error adding free space: %d\n", ret);
return ret;
@@ -800,13 +743,13 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
* expand the range covered by the existing extent entry that represents
* the free space [128Mb + 128Kb, 128Mb + 256Kb[.
*/
- ret = btrfs_add_free_space(cache, 128 * 1024 * 1024, 128 * 1024);
+ ret = btrfs_add_free_space(cache, SZ_128M, SZ_128K);
if (ret) {
test_msg("Error adding free space: %d\n", ret);
return ret;
}
/* Confirm the region is marked as free. */
- if (!test_check_exists(cache, 128 * 1024 * 1024, 128 * 1024)) {
+ if (!test_check_exists(cache, SZ_128M, SZ_128K)) {
test_msg("Extent region not marked as free\n");
return -ENOENT;
}
@@ -834,21 +777,19 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
* that represents the 1Mb free space, and therefore we're able to
* allocate the whole free space at once.
*/
- if (!test_check_exists(cache, 128 * 1024 * 1024 - 768 * 1024,
- 1 * 1024 * 1024)) {
+ if (!test_check_exists(cache, SZ_128M - 768 * SZ_1K, SZ_1M)) {
test_msg("Expected region not marked as free\n");
return -ENOENT;
}
- if (cache->free_space_ctl->free_space != (1 * 1024 * 1024 + 8192)) {
+ if (cache->free_space_ctl->free_space != (SZ_1M + 8192)) {
test_msg("Cache free space is not 1Mb + 8Kb\n");
return -EINVAL;
}
- offset = btrfs_find_space_for_alloc(cache,
- 0, 1 * 1024 * 1024, 0,
+ offset = btrfs_find_space_for_alloc(cache, 0, SZ_1M, 0,
&max_extent_size);
- if (offset != (128 * 1024 * 1024 - 768 * 1024)) {
+ if (offset != (SZ_128M - 768 * SZ_1K)) {
test_msg("Failed to allocate 1Mb from space cache, returned offset is: %llu\n",
offset);
return -EINVAL;
@@ -867,7 +808,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
offset = btrfs_find_space_for_alloc(cache,
0, 8192, 0,
&max_extent_size);
- if (offset != (32 * 1024 * 1024)) {
+ if (offset != SZ_32M) {
test_msg("Failed to allocate 8Kb from space cache, returned offset is: %llu\n",
offset);
return -EINVAL;
@@ -877,7 +818,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
if (ret)
return ret;
- cache->free_space_ctl->op->use_bitmap = use_bitmap_op;
+ cache->free_space_ctl->op = orig_free_space_ops;
__btrfs_remove_free_space_cache(cache->free_space_ctl);
return 0;
@@ -891,7 +832,7 @@ int btrfs_test_free_space_cache(void)
test_msg("Running btrfs free space cache tests\n");
- cache = init_test_block_group();
+ cache = btrfs_alloc_dummy_block_group(1024 * 1024 * 1024);
if (!cache) {
test_msg("Couldn't run the tests\n");
return 0;
@@ -922,9 +863,7 @@ int btrfs_test_free_space_cache(void)
ret = test_steal_space_from_bitmap_to_extent(cache);
out:
- __btrfs_remove_free_space_cache(cache->free_space_ctl);
- kfree(cache->free_space_ctl);
- kfree(cache);
+ btrfs_free_dummy_block_group(cache);
btrfs_free_dummy_root(root);
test_msg("Free space cache tests finished\n");
return ret;
diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c
new file mode 100644
index 000000000..d05fe1ab4
--- /dev/null
+++ b/fs/btrfs/tests/free-space-tree-tests.c
@@ -0,0 +1,571 @@
+/*
+ * Copyright (C) 2015 Facebook. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "btrfs-tests.h"
+#include "../ctree.h"
+#include "../disk-io.h"
+#include "../free-space-tree.h"
+#include "../transaction.h"
+
+struct free_space_extent {
+ u64 start, length;
+};
+
+/*
+ * The test cases align their operations to this in order to hit some of the
+ * edge cases in the bitmap code.
+ */
+#define BITMAP_RANGE (BTRFS_FREE_SPACE_BITMAP_BITS * 4096)
+
+static int __check_free_space_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *cache,
+ struct btrfs_path *path,
+ struct free_space_extent *extents,
+ unsigned int num_extents)
+{
+ struct btrfs_free_space_info *info;
+ struct btrfs_key key;
+ int prev_bit = 0, bit;
+ u64 extent_start = 0, offset, end;
+ u32 flags, extent_count;
+ unsigned int i;
+ int ret;
+
+ info = search_free_space_info(trans, fs_info, cache, path, 0);
+ if (IS_ERR(info)) {
+ test_msg("Could not find free space info\n");
+ ret = PTR_ERR(info);
+ goto out;
+ }
+ flags = btrfs_free_space_flags(path->nodes[0], info);
+ extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
+
+ if (extent_count != num_extents) {
+ test_msg("Extent count is wrong\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
+ if (path->slots[0] != 0)
+ goto invalid;
+ end = cache->key.objectid + cache->key.offset;
+ i = 0;
+ while (++path->slots[0] < btrfs_header_nritems(path->nodes[0])) {
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (key.type != BTRFS_FREE_SPACE_BITMAP_KEY)
+ goto invalid;
+ offset = key.objectid;
+ while (offset < key.objectid + key.offset) {
+ bit = free_space_test_bit(cache, path, offset);
+ if (prev_bit == 0 && bit == 1) {
+ extent_start = offset;
+ } else if (prev_bit == 1 && bit == 0) {
+ if (i >= num_extents)
+ goto invalid;
+ if (i >= num_extents ||
+ extent_start != extents[i].start ||
+ offset - extent_start != extents[i].length)
+ goto invalid;
+ i++;
+ }
+ prev_bit = bit;
+ offset += cache->sectorsize;
+ }
+ }
+ if (prev_bit == 1) {
+ if (i >= num_extents ||
+ extent_start != extents[i].start ||
+ end - extent_start != extents[i].length)
+ goto invalid;
+ i++;
+ }
+ if (i != num_extents)
+ goto invalid;
+ } else {
+ if (btrfs_header_nritems(path->nodes[0]) != num_extents + 1 ||
+ path->slots[0] != 0)
+ goto invalid;
+ for (i = 0; i < num_extents; i++) {
+ path->slots[0]++;
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY ||
+ key.objectid != extents[i].start ||
+ key.offset != extents[i].length)
+ goto invalid;
+ }
+ }
+
+ ret = 0;
+out:
+ btrfs_release_path(path);
+ return ret;
+invalid:
+ test_msg("Free space tree is invalid\n");
+ ret = -EINVAL;
+ goto out;
+}
+
+static int check_free_space_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *cache,
+ struct btrfs_path *path,
+ struct free_space_extent *extents,
+ unsigned int num_extents)
+{
+ struct btrfs_free_space_info *info;
+ u32 flags;
+ int ret;
+
+ info = search_free_space_info(trans, fs_info, cache, path, 0);
+ if (IS_ERR(info)) {
+ test_msg("Could not find free space info\n");
+ btrfs_release_path(path);
+ return PTR_ERR(info);
+ }
+ flags = btrfs_free_space_flags(path->nodes[0], info);
+ btrfs_release_path(path);
+
+ ret = __check_free_space_extents(trans, fs_info, cache, path, extents,
+ num_extents);
+ if (ret)
+ return ret;
+
+ /* Flip it to the other format and check that for good measure. */
+ if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
+ ret = convert_free_space_to_extents(trans, fs_info, cache, path);
+ if (ret) {
+ test_msg("Could not convert to extents\n");
+ return ret;
+ }
+ } else {
+ ret = convert_free_space_to_bitmaps(trans, fs_info, cache, path);
+ if (ret) {
+ test_msg("Could not convert to bitmaps\n");
+ return ret;
+ }
+ }
+ return __check_free_space_extents(trans, fs_info, cache, path, extents,
+ num_extents);
+}
+
+static int test_empty_block_group(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *cache,
+ struct btrfs_path *path)
+{
+ struct free_space_extent extents[] = {
+ {cache->key.objectid, cache->key.offset},
+ };
+
+ return check_free_space_extents(trans, fs_info, cache, path,
+ extents, ARRAY_SIZE(extents));
+}
+
+static int test_remove_all(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *cache,
+ struct btrfs_path *path)
+{
+ struct free_space_extent extents[] = {};
+ int ret;
+
+ ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+ cache->key.objectid,
+ cache->key.offset);
+ if (ret) {
+ test_msg("Could not remove free space\n");
+ return ret;
+ }
+
+ return check_free_space_extents(trans, fs_info, cache, path,
+ extents, ARRAY_SIZE(extents));
+}
+
+static int test_remove_beginning(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *cache,
+ struct btrfs_path *path)
+{
+ struct free_space_extent extents[] = {
+ {cache->key.objectid + BITMAP_RANGE,
+ cache->key.offset - BITMAP_RANGE},
+ };
+ int ret;
+
+ ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+ cache->key.objectid, BITMAP_RANGE);
+ if (ret) {
+ test_msg("Could not remove free space\n");
+ return ret;
+ }
+
+ return check_free_space_extents(trans, fs_info, cache, path,
+ extents, ARRAY_SIZE(extents));
+
+}
+
+static int test_remove_end(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *cache,
+ struct btrfs_path *path)
+{
+ struct free_space_extent extents[] = {
+ {cache->key.objectid, cache->key.offset - BITMAP_RANGE},
+ };
+ int ret;
+
+ ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+ cache->key.objectid +
+ cache->key.offset - BITMAP_RANGE,
+ BITMAP_RANGE);
+ if (ret) {
+ test_msg("Could not remove free space\n");
+ return ret;
+ }
+
+ return check_free_space_extents(trans, fs_info, cache, path,
+ extents, ARRAY_SIZE(extents));
+}
+
+static int test_remove_middle(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *cache,
+ struct btrfs_path *path)
+{
+ struct free_space_extent extents[] = {
+ {cache->key.objectid, BITMAP_RANGE},
+ {cache->key.objectid + 2 * BITMAP_RANGE,
+ cache->key.offset - 2 * BITMAP_RANGE},
+ };
+ int ret;
+
+ ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+ cache->key.objectid + BITMAP_RANGE,
+ BITMAP_RANGE);
+ if (ret) {
+ test_msg("Could not remove free space\n");
+ return ret;
+ }
+
+ return check_free_space_extents(trans, fs_info, cache, path,
+ extents, ARRAY_SIZE(extents));
+}
+
+static int test_merge_left(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *cache,
+ struct btrfs_path *path)
+{
+ struct free_space_extent extents[] = {
+ {cache->key.objectid, 2 * BITMAP_RANGE},
+ };
+ int ret;
+
+ ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+ cache->key.objectid,
+ cache->key.offset);
+ if (ret) {
+ test_msg("Could not remove free space\n");
+ return ret;
+ }
+
+ ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+ cache->key.objectid, BITMAP_RANGE);
+ if (ret) {
+ test_msg("Could not add free space\n");
+ return ret;
+ }
+
+ ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+ cache->key.objectid + BITMAP_RANGE,
+ BITMAP_RANGE);
+ if (ret) {
+ test_msg("Could not add free space\n");
+ return ret;
+ }
+
+ return check_free_space_extents(trans, fs_info, cache, path,
+ extents, ARRAY_SIZE(extents));
+}
+
+static int test_merge_right(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *cache,
+ struct btrfs_path *path)
+{
+ struct free_space_extent extents[] = {
+ {cache->key.objectid + BITMAP_RANGE, 2 * BITMAP_RANGE},
+ };
+ int ret;
+
+ ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+ cache->key.objectid,
+ cache->key.offset);
+ if (ret) {
+ test_msg("Could not remove free space\n");
+ return ret;
+ }
+
+ ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+ cache->key.objectid + 2 * BITMAP_RANGE,
+ BITMAP_RANGE);
+ if (ret) {
+ test_msg("Could not add free space\n");
+ return ret;
+ }
+
+ ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+ cache->key.objectid + BITMAP_RANGE,
+ BITMAP_RANGE);
+ if (ret) {
+ test_msg("Could not add free space\n");
+ return ret;
+ }
+
+ return check_free_space_extents(trans, fs_info, cache, path,
+ extents, ARRAY_SIZE(extents));
+}
+
+static int test_merge_both(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *cache,
+ struct btrfs_path *path)
+{
+ struct free_space_extent extents[] = {
+ {cache->key.objectid, 3 * BITMAP_RANGE},
+ };
+ int ret;
+
+ ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+ cache->key.objectid,
+ cache->key.offset);
+ if (ret) {
+ test_msg("Could not remove free space\n");
+ return ret;
+ }
+
+ ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+ cache->key.objectid, BITMAP_RANGE);
+ if (ret) {
+ test_msg("Could not add free space\n");
+ return ret;
+ }
+
+ ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+ cache->key.objectid + 2 * BITMAP_RANGE,
+ BITMAP_RANGE);
+ if (ret) {
+ test_msg("Could not add free space\n");
+ return ret;
+ }
+
+ ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+ cache->key.objectid + BITMAP_RANGE,
+ BITMAP_RANGE);
+ if (ret) {
+ test_msg("Could not add free space\n");
+ return ret;
+ }
+
+ return check_free_space_extents(trans, fs_info, cache, path,
+ extents, ARRAY_SIZE(extents));
+}
+
+static int test_merge_none(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *cache,
+ struct btrfs_path *path)
+{
+ struct free_space_extent extents[] = {
+ {cache->key.objectid, BITMAP_RANGE},
+ {cache->key.objectid + 2 * BITMAP_RANGE, BITMAP_RANGE},
+ {cache->key.objectid + 4 * BITMAP_RANGE, BITMAP_RANGE},
+ };
+ int ret;
+
+ ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+ cache->key.objectid,
+ cache->key.offset);
+ if (ret) {
+ test_msg("Could not remove free space\n");
+ return ret;
+ }
+
+ ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+ cache->key.objectid, BITMAP_RANGE);
+ if (ret) {
+ test_msg("Could not add free space\n");
+ return ret;
+ }
+
+ ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+ cache->key.objectid + 4 * BITMAP_RANGE,
+ BITMAP_RANGE);
+ if (ret) {
+ test_msg("Could not add free space\n");
+ return ret;
+ }
+
+ ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+ cache->key.objectid + 2 * BITMAP_RANGE,
+ BITMAP_RANGE);
+ if (ret) {
+ test_msg("Could not add free space\n");
+ return ret;
+ }
+
+ return check_free_space_extents(trans, fs_info, cache, path,
+ extents, ARRAY_SIZE(extents));
+}
+
+typedef int (*test_func_t)(struct btrfs_trans_handle *,
+ struct btrfs_fs_info *,
+ struct btrfs_block_group_cache *,
+ struct btrfs_path *);
+
+static int run_test(test_func_t test_func, int bitmaps)
+{
+ struct btrfs_root *root = NULL;
+ struct btrfs_block_group_cache *cache = NULL;
+ struct btrfs_trans_handle trans;
+ struct btrfs_path *path = NULL;
+ int ret;
+
+ root = btrfs_alloc_dummy_root();
+ if (IS_ERR(root)) {
+ test_msg("Couldn't allocate dummy root\n");
+ ret = PTR_ERR(root);
+ goto out;
+ }
+
+ root->fs_info = btrfs_alloc_dummy_fs_info();
+ if (!root->fs_info) {
+ test_msg("Couldn't allocate dummy fs info\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ btrfs_set_super_compat_ro_flags(root->fs_info->super_copy,
+ BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE);
+ root->fs_info->free_space_root = root;
+ root->fs_info->tree_root = root;
+
+ root->node = alloc_test_extent_buffer(root->fs_info, 4096);
+ if (!root->node) {
+ test_msg("Couldn't allocate dummy buffer\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+ btrfs_set_header_level(root->node, 0);
+ btrfs_set_header_nritems(root->node, 0);
+ root->alloc_bytenr += 8192;
+
+ cache = btrfs_alloc_dummy_block_group(8 * BITMAP_RANGE);
+ if (!cache) {
+ test_msg("Couldn't allocate dummy block group cache\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+ cache->bitmap_low_thresh = 0;
+ cache->bitmap_high_thresh = (u32)-1;
+ cache->needs_free_space = 1;
+
+ btrfs_init_dummy_trans(&trans);
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ test_msg("Couldn't allocate path\n");
+ return -ENOMEM;
+ }
+
+ ret = add_block_group_free_space(&trans, root->fs_info, cache);
+ if (ret) {
+ test_msg("Could not add block group free space\n");
+ goto out;
+ }
+
+ if (bitmaps) {
+ ret = convert_free_space_to_bitmaps(&trans, root->fs_info,
+ cache, path);
+ if (ret) {
+ test_msg("Could not convert block group to bitmaps\n");
+ goto out;
+ }
+ }
+
+ ret = test_func(&trans, root->fs_info, cache, path);
+ if (ret)
+ goto out;
+
+ ret = remove_block_group_free_space(&trans, root->fs_info, cache);
+ if (ret) {
+ test_msg("Could not remove block group free space\n");
+ goto out;
+ }
+
+ if (btrfs_header_nritems(root->node) != 0) {
+ test_msg("Free space tree has leftover items\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ btrfs_free_path(path);
+ btrfs_free_dummy_block_group(cache);
+ btrfs_free_dummy_root(root);
+ return ret;
+}
+
+static int run_test_both_formats(test_func_t test_func)
+{
+ int ret;
+
+ ret = run_test(test_func, 0);
+ if (ret)
+ return ret;
+ return run_test(test_func, 1);
+}
+
+int btrfs_test_free_space_tree(void)
+{
+ test_func_t tests[] = {
+ test_empty_block_group,
+ test_remove_all,
+ test_remove_beginning,
+ test_remove_end,
+ test_remove_middle,
+ test_merge_left,
+ test_merge_right,
+ test_merge_both,
+ test_merge_none,
+ };
+ int i;
+
+ test_msg("Running free space tree tests\n");
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ int ret = run_test_both_formats(tests[i]);
+ if (ret) {
+ test_msg("%pf failed\n", tests[i]);
+ return ret;
+ }
+ }
+
+ return 0;
+}
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 054fc0d97..e2d3da02d 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -100,7 +100,7 @@ static void insert_inode_item_key(struct btrfs_root *root)
static void setup_file_extents(struct btrfs_root *root)
{
int slot = 0;
- u64 disk_bytenr = 1 * 1024 * 1024;
+ u64 disk_bytenr = SZ_1M;
u64 offset = 0;
/* First we want a hole */
@@ -974,7 +974,7 @@ static int test_extent_accounting(void)
(BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
EXTENT_DELALLOC | EXTENT_DIRTY |
EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
- NULL, GFP_NOFS);
+ NULL, GFP_KERNEL);
if (ret) {
test_msg("clear_extent_bit returned %d\n", ret);
goto out;
@@ -1045,7 +1045,7 @@ static int test_extent_accounting(void)
BTRFS_MAX_EXTENT_SIZE+8191,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
- NULL, GFP_NOFS);
+ NULL, GFP_KERNEL);
if (ret) {
test_msg("clear_extent_bit returned %d\n", ret);
goto out;
@@ -1079,7 +1079,7 @@ static int test_extent_accounting(void)
ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
- NULL, GFP_NOFS);
+ NULL, GFP_KERNEL);
if (ret) {
test_msg("clear_extent_bit returned %d\n", ret);
goto out;
@@ -1096,7 +1096,7 @@ out:
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
- NULL, GFP_NOFS);
+ NULL, GFP_KERNEL);
iput(inode);
btrfs_free_dummy_root(root);
return ret;
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index 846d277b1..8ea5d34bc 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -23,14 +23,6 @@
#include "../qgroup.h"
#include "../backref.h"
-static void init_dummy_trans(struct btrfs_trans_handle *trans)
-{
- memset(trans, 0, sizeof(*trans));
- trans->transid = 1;
- INIT_LIST_HEAD(&trans->qgroup_ref_list);
- trans->type = __TRANS_DUMMY;
-}
-
static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr,
u64 num_bytes, u64 parent, u64 root_objectid)
{
@@ -44,7 +36,7 @@ static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr,
u32 size = sizeof(*item) + sizeof(*iref) + sizeof(*block_info);
int ret;
- init_dummy_trans(&trans);
+ btrfs_init_dummy_trans(&trans);
ins.objectid = bytenr;
ins.type = BTRFS_EXTENT_ITEM_KEY;
@@ -94,7 +86,7 @@ static int add_tree_ref(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
u64 refs;
int ret;
- init_dummy_trans(&trans);
+ btrfs_init_dummy_trans(&trans);
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
@@ -144,7 +136,7 @@ static int remove_extent_item(struct btrfs_root *root, u64 bytenr,
struct btrfs_path *path;
int ret;
- init_dummy_trans(&trans);
+ btrfs_init_dummy_trans(&trans);
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
@@ -178,7 +170,7 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr,
u64 refs;
int ret;
- init_dummy_trans(&trans);
+ btrfs_init_dummy_trans(&trans);
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
@@ -232,7 +224,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
struct ulist *new_roots = NULL;
int ret;
- init_dummy_trans(&trans);
+ btrfs_init_dummy_trans(&trans);
test_msg("Qgroup basic add\n");
ret = btrfs_create_qgroup(NULL, fs_info, 5);
@@ -326,7 +318,7 @@ static int test_multiple_refs(struct btrfs_root *root)
struct ulist *new_roots = NULL;
int ret;
- init_dummy_trans(&trans);
+ btrfs_init_dummy_trans(&trans);
test_msg("Qgroup multiple refs test\n");
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index be8eae80f..b6031ce47 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -75,6 +75,23 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
list_del_init(&em->list);
free_extent_map(em);
}
+ /*
+ * If any block groups are found in ->deleted_bgs then it's
+ * because the transaction was aborted and a commit did not
+ * happen (things failed before writing the new superblock
+ * and calling btrfs_finish_extent_commit()), so we can not
+ * discard the physical locations of the block groups.
+ */
+ while (!list_empty(&transaction->deleted_bgs)) {
+ struct btrfs_block_group_cache *cache;
+
+ cache = list_first_entry(&transaction->deleted_bgs,
+ struct btrfs_block_group_cache,
+ bg_list);
+ list_del_init(&cache->bg_list);
+ btrfs_put_block_group_trimming(cache);
+ btrfs_put_block_group(cache);
+ }
kmem_cache_free(btrfs_transaction_cachep, transaction);
}
}
@@ -634,17 +651,20 @@ struct btrfs_trans_handle *btrfs_start_transaction_lflush(
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
{
- return start_transaction(root, 0, TRANS_JOIN, 0);
+ return start_transaction(root, 0, TRANS_JOIN,
+ BTRFS_RESERVE_NO_FLUSH);
}
struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)
{
- return start_transaction(root, 0, TRANS_JOIN_NOLOCK, 0);
+ return start_transaction(root, 0, TRANS_JOIN_NOLOCK,
+ BTRFS_RESERVE_NO_FLUSH);
}
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
{
- return start_transaction(root, 0, TRANS_USERSPACE, 0);
+ return start_transaction(root, 0, TRANS_USERSPACE,
+ BTRFS_RESERVE_NO_FLUSH);
}
/*
@@ -662,7 +682,8 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root
*/
struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root)
{
- return start_transaction(root, 0, TRANS_ATTACH, 0);
+ return start_transaction(root, 0, TRANS_ATTACH,
+ BTRFS_RESERVE_NO_FLUSH);
}
/*
@@ -677,7 +698,8 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root)
{
struct btrfs_trans_handle *trans;
- trans = start_transaction(root, 0, TRANS_ATTACH, 0);
+ trans = start_transaction(root, 0, TRANS_ATTACH,
+ BTRFS_RESERVE_NO_FLUSH);
if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT)
btrfs_wait_for_commit(root, 0);
@@ -1319,17 +1341,11 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
u64 root_flags;
uuid_le new_uuid;
- path = btrfs_alloc_path();
- if (!path) {
- pending->error = -ENOMEM;
- return 0;
- }
+ ASSERT(pending->path);
+ path = pending->path;
- new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
- if (!new_root_item) {
- pending->error = -ENOMEM;
- goto root_item_alloc_fail;
- }
+ ASSERT(pending->root_item);
+ new_root_item = pending->root_item;
pending->error = btrfs_find_free_objectid(tree_root, &objectid);
if (pending->error)
@@ -1562,8 +1578,10 @@ clear_skip_qgroup:
btrfs_clear_skip_qgroup(trans);
no_free_objectid:
kfree(new_root_item);
-root_item_alloc_fail:
+ pending->root_item = NULL;
btrfs_free_path(path);
+ pending->path = NULL;
+
return ret;
}
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 64c8221b6..72be51f7c 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -137,8 +137,10 @@ struct btrfs_pending_snapshot {
struct dentry *dentry;
struct inode *dir;
struct btrfs_root *root;
+ struct btrfs_root_item *root_item;
struct btrfs_root *snap;
struct btrfs_qgroup_inherit *inherit;
+ struct btrfs_path *path;
/* block reservation for the operation */
struct btrfs_block_rsv block_rsv;
u64 qgroup_reserved;
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index f31db4325..cb6508912 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -89,6 +89,12 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
goto out;
}
btrfs_release_path(path);
+ /*
+ * We don't need a lock on a leaf. btrfs_realloc_node() will lock all
+ * leafs from path->nodes[1], so set lowest_level to 1 to avoid later
+ * a deadlock (attempting to write lock an already write locked leaf).
+ */
+ path->lowest_level = 1;
wret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (wret < 0) {
@@ -99,9 +105,12 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
ret = 0;
goto out;
}
- path->slots[1] = btrfs_header_nritems(path->nodes[1]);
- next_key_ret = btrfs_find_next_key(root, path, &key, 1,
- min_trans);
+ /*
+ * The node at level 1 must always be locked when our path has
+ * keep_locks set and lowest_level is 1, regardless of the value of
+ * path->slots[1].
+ */
+ BUG_ON(path->locks[1] == 0);
ret = btrfs_realloc_node(trans, root,
path->nodes[1], 0,
&last_ret,
@@ -110,6 +119,18 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
WARN_ON(ret == -EAGAIN);
goto out;
}
+ /*
+ * Now that we reallocated the node we can find the next key. Note that
+ * btrfs_find_next_key() can release our path and do another search
+ * without COWing, this is because even with path->keep_locks = 1,
+ * btrfs_search_slot() / ctree.c:unlock_up() does not keeps a lock on a
+ * node when path->slots[node_level - 1] does not point to the last
+ * item or a slot beyond the last item (ctree.c:unlock_up()). Therefore
+ * we search for the next key after reallocating our node.
+ */
+ path->slots[1] = btrfs_header_nritems(path->nodes[1]);
+ next_key_ret = btrfs_find_next_key(root, path, &key, 1,
+ min_trans);
if (next_key_ret == 0) {
memcpy(&root->defrag_progress, &key, sizeof(key));
ret = -EAGAIN;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 323e12cc9..978c3a810 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4127,7 +4127,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
struct inode *inode,
struct btrfs_path *path,
struct list_head *logged_list,
- struct btrfs_log_ctx *ctx)
+ struct btrfs_log_ctx *ctx,
+ const u64 start,
+ const u64 end)
{
struct extent_map *em, *n;
struct list_head extents;
@@ -4166,7 +4168,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
}
list_sort(NULL, &extents, extent_cmp);
-
+ /*
+ * Collect any new ordered extents within the range. This is to
+ * prevent logging file extent items without waiting for the disk
+ * location they point to being written. We do this only to deal
+ * with races against concurrent lockless direct IO writes.
+ */
+ btrfs_get_logged_extents(inode, logged_list, start, end);
process:
while (!list_empty(&extents)) {
em = list_entry(extents.next, struct extent_map, list);
@@ -4701,7 +4709,7 @@ log_extents:
goto out_unlock;
}
ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
- &logged_list, ctx);
+ &logged_list, ctx, start, end);
if (ret) {
err = ret;
goto out_unlock;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9c62a6f97..366b33594 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -108,7 +108,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
},
};
-const u64 const btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
+const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
[BTRFS_RAID_RAID10] = BTRFS_BLOCK_GROUP_RAID10,
[BTRFS_RAID_RAID1] = BTRFS_BLOCK_GROUP_RAID1,
[BTRFS_RAID_DUP] = BTRFS_BLOCK_GROUP_DUP,
@@ -125,6 +125,7 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
+static void btrfs_close_one_device(struct btrfs_device *device);
DEFINE_MUTEX(uuid_mutex);
static LIST_HEAD(fs_uuids);
@@ -1103,7 +1104,7 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- path->reada = 2;
+ path->reada = READA_FORWARD;
key.objectid = device->devid;
key.offset = start;
@@ -1183,7 +1184,7 @@ again:
struct map_lookup *map;
int i;
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
for (i = 0; i < map->num_stripes; i++) {
u64 end;
@@ -1281,7 +1282,7 @@ again:
goto out;
}
- path->reada = 2;
+ path->reada = READA_FORWARD;
path->search_commit_root = 1;
path->skip_locking = 1;
@@ -1643,7 +1644,6 @@ static void update_dev_time(char *path_name)
return;
file_update_time(filp);
filp_close(filp, NULL);
- return;
}
static int btrfs_rm_dev_item(struct btrfs_root *root,
@@ -2756,7 +2756,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
free_extent_map(em);
return -EINVAL;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
lock_chunks(root->fs_info->chunk_root);
check_system_chunk(trans, extent_root, map->type);
unlock_chunks(root->fs_info->chunk_root);
@@ -3407,7 +3407,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
list_for_each_entry(device, devices, dev_list) {
old_size = btrfs_device_get_total_bytes(device);
size_to_free = div_factor(old_size, 1);
- size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);
+ size_to_free = min_t(u64, size_to_free, SZ_1M);
if (!device->writeable ||
btrfs_device_get_total_bytes(device) -
btrfs_device_get_bytes_used(device) > size_to_free ||
@@ -3724,14 +3724,6 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
goto out;
}
- /* allow dup'ed data chunks only in mixed mode */
- if (!mixed && (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
- (bctl->data.target & BTRFS_BLOCK_GROUP_DUP)) {
- btrfs_err(fs_info, "dup for data is not allowed");
- ret = -EINVAL;
- goto out;
- }
-
/* allow to reduce meta or sys integrity only if force set */
allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_RAID10 |
@@ -3757,6 +3749,13 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
}
} while (read_seqretry(&fs_info->profiles_lock, seq));
+ if (btrfs_get_num_tolerated_disk_barrier_failures(bctl->meta.target) <
+ btrfs_get_num_tolerated_disk_barrier_failures(bctl->data.target)) {
+ btrfs_warn(fs_info,
+ "metadata profile 0x%llx has lower redundancy than data profile 0x%llx",
+ bctl->meta.target, bctl->data.target);
+ }
+
if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
fs_info->num_tolerated_disk_barrier_failures = min(
btrfs_calc_num_tolerated_disk_barrier_failures(fs_info),
@@ -4269,7 +4268,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
if (!path)
return -ENOMEM;
- path->reada = 2;
+ path->reada = READA_FORWARD;
lock_chunks(root);
@@ -4461,7 +4460,7 @@ static int btrfs_cmp_device_info(const void *a, const void *b)
static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target)
{
/* TODO allow them to set a preferred stripe size */
- return 64 * 1024;
+ return SZ_64K;
}
static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
@@ -4529,21 +4528,21 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
ncopies = btrfs_raid_array[index].ncopies;
if (type & BTRFS_BLOCK_GROUP_DATA) {
- max_stripe_size = 1024 * 1024 * 1024;
+ max_stripe_size = SZ_1G;
max_chunk_size = 10 * max_stripe_size;
if (!devs_max)
devs_max = BTRFS_MAX_DEVS(info->chunk_root);
} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
/* for larger filesystems, use larger metadata chunks */
- if (fs_devices->total_rw_bytes > 50ULL * 1024 * 1024 * 1024)
- max_stripe_size = 1024 * 1024 * 1024;
+ if (fs_devices->total_rw_bytes > 50ULL * SZ_1G)
+ max_stripe_size = SZ_1G;
else
- max_stripe_size = 256 * 1024 * 1024;
+ max_stripe_size = SZ_256M;
max_chunk_size = max_stripe_size;
if (!devs_max)
devs_max = BTRFS_MAX_DEVS(info->chunk_root);
} else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
- max_stripe_size = 32 * 1024 * 1024;
+ max_stripe_size = SZ_32M;
max_chunk_size = 2 * max_stripe_size;
if (!devs_max)
devs_max = BTRFS_MAX_DEVS_SYS_CHUNK;
@@ -4720,7 +4719,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
goto error;
}
set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
- em->bdev = (struct block_device *)map;
+ em->map_lookup = map;
em->start = start;
em->len = num_bytes;
em->block_start = 0;
@@ -4794,7 +4793,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
u64 dev_offset;
u64 stripe_size;
int i = 0;
- int ret;
+ int ret = 0;
em_tree = &extent_root->fs_info->mapping_tree.map_tree;
read_lock(&em_tree->lock);
@@ -4815,7 +4814,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
return -EINVAL;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
item_size = btrfs_chunk_item_size(map->num_stripes);
stripe_size = em->orig_block_len;
@@ -4825,20 +4824,32 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
goto out;
}
+ /*
+ * Take the device list mutex to prevent races with the final phase of
+ * a device replace operation that replaces the device object associated
+ * with the map's stripes, because the device object's id can change
+ * at any time during that final phase of the device replace operation
+ * (dev-replace.c:btrfs_dev_replace_finishing()).
+ */
+ mutex_lock(&chunk_root->fs_info->fs_devices->device_list_mutex);
for (i = 0; i < map->num_stripes; i++) {
device = map->stripes[i].dev;
dev_offset = map->stripes[i].physical;
ret = btrfs_update_device(trans, device);
if (ret)
- goto out;
+ break;
ret = btrfs_alloc_dev_extent(trans, device,
chunk_root->root_key.objectid,
BTRFS_FIRST_CHUNK_TREE_OBJECTID,
chunk_offset, dev_offset,
stripe_size);
if (ret)
- goto out;
+ break;
+ }
+ if (ret) {
+ mutex_unlock(&chunk_root->fs_info->fs_devices->device_list_mutex);
+ goto out;
}
stripe = &chunk->stripe;
@@ -4851,6 +4862,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
stripe++;
}
+ mutex_unlock(&chunk_root->fs_info->fs_devices->device_list_mutex);
btrfs_set_stack_chunk_length(chunk, chunk_size);
btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
@@ -4957,7 +4969,7 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
if (!em)
return 1;
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
for (i = 0; i < map->num_stripes; i++) {
if (map->stripes[i].dev->missing) {
miss_ndevs++;
@@ -5037,7 +5049,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
return 1;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
ret = map->num_stripes;
else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
@@ -5073,7 +5085,7 @@ unsigned long btrfs_full_stripe_len(struct btrfs_root *root,
BUG_ON(!em);
BUG_ON(em->start > logical || em->start + em->len < logical);
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
len = map->stripe_len * nr_data_stripes(map);
free_extent_map(em);
@@ -5094,7 +5106,7 @@ int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
BUG_ON(!em);
BUG_ON(em->start > logical || em->start + em->len < logical);
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
ret = 1;
free_extent_map(em);
@@ -5253,7 +5265,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
return -EINVAL;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
offset = logical - em->start;
stripe_len = map->stripe_len;
@@ -5367,35 +5379,33 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
* target drive.
*/
for (i = 0; i < tmp_num_stripes; i++) {
- if (tmp_bbio->stripes[i].dev->devid == srcdev_devid) {
- /*
- * In case of DUP, in order to keep it
- * simple, only add the mirror with the
- * lowest physical address
- */
- if (found &&
- physical_of_found <=
- tmp_bbio->stripes[i].physical)
- continue;
- index_srcdev = i;
- found = 1;
- physical_of_found =
- tmp_bbio->stripes[i].physical;
- }
+ if (tmp_bbio->stripes[i].dev->devid != srcdev_devid)
+ continue;
+
+ /*
+ * In case of DUP, in order to keep it simple, only add
+ * the mirror with the lowest physical address
+ */
+ if (found &&
+ physical_of_found <= tmp_bbio->stripes[i].physical)
+ continue;
+
+ index_srcdev = i;
+ found = 1;
+ physical_of_found = tmp_bbio->stripes[i].physical;
}
- if (found) {
- mirror_num = index_srcdev + 1;
- patch_the_first_stripe_for_dev_replace = 1;
- physical_to_patch_in_first_stripe = physical_of_found;
- } else {
+ btrfs_put_bbio(tmp_bbio);
+
+ if (!found) {
WARN_ON(1);
ret = -EIO;
- btrfs_put_bbio(tmp_bbio);
goto out;
}
- btrfs_put_bbio(tmp_bbio);
+ mirror_num = index_srcdev + 1;
+ patch_the_first_stripe_for_dev_replace = 1;
+ physical_to_patch_in_first_stripe = physical_of_found;
} else if (mirror_num > map->num_stripes) {
mirror_num = 0;
}
@@ -5795,7 +5805,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
free_extent_map(em);
return -EIO;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
length = em->len;
rmap_len = map->stripe_len;
@@ -6058,7 +6068,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
bbio->fs_info = root->fs_info;
atomic_set(&bbio->stripes_pending, bbio->num_stripes);
- if (bbio->raid_map) {
+ if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
+ ((rw & WRITE) || (mirror_num > 1))) {
/* In this case, map_length has been set to the length of
a single stripe; not the whole write */
if (rw & WRITE) {
@@ -6199,6 +6210,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
struct extent_map *em;
u64 logical;
u64 length;
+ u64 stripe_len;
u64 devid;
u8 uuid[BTRFS_UUID_SIZE];
int num_stripes;
@@ -6207,6 +6219,37 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
logical = key->offset;
length = btrfs_chunk_length(leaf, chunk);
+ stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+ num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+ /* Validation check */
+ if (!num_stripes) {
+ btrfs_err(root->fs_info, "invalid chunk num_stripes: %u",
+ num_stripes);
+ return -EIO;
+ }
+ if (!IS_ALIGNED(logical, root->sectorsize)) {
+ btrfs_err(root->fs_info,
+ "invalid chunk logical %llu", logical);
+ return -EIO;
+ }
+ if (!length || !IS_ALIGNED(length, root->sectorsize)) {
+ btrfs_err(root->fs_info,
+ "invalid chunk length %llu", length);
+ return -EIO;
+ }
+ if (!is_power_of_2(stripe_len)) {
+ btrfs_err(root->fs_info, "invalid chunk stripe length: %llu",
+ stripe_len);
+ return -EIO;
+ }
+ if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+ btrfs_chunk_type(leaf, chunk)) {
+ btrfs_err(root->fs_info, "unrecognized chunk type: %llu",
+ ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
+ BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+ btrfs_chunk_type(leaf, chunk));
+ return -EIO;
+ }
read_lock(&map_tree->map_tree.lock);
em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
@@ -6223,7 +6266,6 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
em = alloc_extent_map();
if (!em)
return -ENOMEM;
- num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
if (!map) {
free_extent_map(em);
@@ -6231,7 +6273,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
}
set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
- em->bdev = (struct block_device *)map;
+ em->map_lookup = map;
em->start = logical;
em->len = length;
em->orig_start = 0;
@@ -6466,11 +6508,11 @@ int btrfs_read_sys_array(struct btrfs_root *root)
sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET);
if (!sb)
return -ENOMEM;
- btrfs_set_buffer_uptodate(sb);
+ set_extent_buffer_uptodate(sb);
btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
/*
* The sb extent buffer is artifical and just used to read the system array.
- * btrfs_set_buffer_uptodate() call does not properly mark all it's
+ * set_extent_buffer_uptodate() call does not properly mark all it's
* pages up-to-date when the page is larger: extent does not cover the
* whole page and consequently check_page_uptodate does not find all
* the page's extents up-to-date (the hole beyond sb),
@@ -6529,6 +6571,9 @@ int btrfs_read_sys_array(struct btrfs_root *root)
if (ret)
break;
} else {
+ printk(KERN_ERR
+ "BTRFS: unexpected item type %u in sys_array at offset %u\n",
+ (u32)key.type, cur_offset);
ret = -EIO;
break;
}
@@ -6930,7 +6975,7 @@ void btrfs_update_commit_device_bytes_used(struct btrfs_root *root,
/* In order to kick the device replace finish process */
lock_chunks(root);
list_for_each_entry(em, &transaction->pending_chunks, list) {
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
for (i = 0; i < map->num_stripes; i++) {
dev = map->stripes[i].dev;
@@ -6958,7 +7003,7 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
}
}
-void btrfs_close_one_device(struct btrfs_device *device)
+static void btrfs_close_one_device(struct btrfs_device *device)
{
struct btrfs_fs_devices *fs_devices = device->fs_devices;
struct btrfs_device *new_device;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index d5c84f6b1..1939ebde6 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -26,7 +26,7 @@
extern struct mutex uuid_mutex;
-#define BTRFS_STRIPE_LEN (64 * 1024)
+#define BTRFS_STRIPE_LEN SZ_64K
struct buffer_head;
struct btrfs_pending_bios {
@@ -566,6 +566,5 @@ static inline void unlock_chunks(struct btrfs_root *root)
struct list_head *btrfs_get_fs_uuids(void);
void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info);
-void btrfs_close_one_device(struct btrfs_device *device);
#endif
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 1fcd7b6e7..6c68d6356 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -126,7 +126,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
* locks the inode's i_mutex before calling setxattr or removexattr.
*/
if (flags & XATTR_REPLACE) {
- ASSERT(mutex_is_locked(&inode->i_mutex));
+ ASSERT(inode_is_locked(inode));
di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
name, name_len, 0);
if (!di)
@@ -283,7 +283,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- path->reada = 2;
+ path->reada = READA_FORWARD;
/* search for our xattrs */
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -351,137 +351,89 @@ err:
return ret;
}
-/*
- * List of handlers for synthetic system.* attributes. All real ondisk
- * attributes are handled directly.
- */
-const struct xattr_handler *btrfs_xattr_handlers[] = {
-#ifdef CONFIG_BTRFS_FS_POSIX_ACL
- &posix_acl_access_xattr_handler,
- &posix_acl_default_xattr_handler,
-#endif
- NULL,
-};
-
-/*
- * Check if the attribute is in a supported namespace.
- *
- * This is applied after the check for the synthetic attributes in the system
- * namespace.
- */
-static int btrfs_is_valid_xattr(const char *name)
+static int btrfs_xattr_handler_get(const struct xattr_handler *handler,
+ struct dentry *dentry, const char *name,
+ void *buffer, size_t size)
{
- int len = strlen(name);
- int prefixlen = 0;
-
- if (!strncmp(name, XATTR_SECURITY_PREFIX,
- XATTR_SECURITY_PREFIX_LEN))
- prefixlen = XATTR_SECURITY_PREFIX_LEN;
- else if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- prefixlen = XATTR_SYSTEM_PREFIX_LEN;
- else if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
- prefixlen = XATTR_TRUSTED_PREFIX_LEN;
- else if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
- prefixlen = XATTR_USER_PREFIX_LEN;
- else if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
- prefixlen = XATTR_BTRFS_PREFIX_LEN;
- else
- return -EOPNOTSUPP;
-
- /*
- * The name cannot consist of just prefix
- */
- if (len <= prefixlen)
- return -EINVAL;
+ struct inode *inode = d_inode(dentry);
- return 0;
+ name = xattr_full_name(handler, name);
+ return __btrfs_getxattr(inode, name, buffer, size);
}
-ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
- void *buffer, size_t size)
+static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
+ struct dentry *dentry, const char *name,
+ const void *buffer, size_t size,
+ int flags)
{
- int ret;
+ struct inode *inode = d_inode(dentry);
- /*
- * If this is a request for a synthetic attribute in the system.*
- * namespace use the generic infrastructure to resolve a handler
- * for it via sb->s_xattr.
- */
- if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- return generic_getxattr(dentry, name, buffer, size);
+ name = xattr_full_name(handler, name);
+ return __btrfs_setxattr(NULL, inode, name, buffer, size, flags);
+}
- ret = btrfs_is_valid_xattr(name);
- if (ret)
- return ret;
- return __btrfs_getxattr(d_inode(dentry), name, buffer, size);
+static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
+ struct dentry *dentry,
+ const char *name, const void *value,
+ size_t size, int flags)
+{
+ name = xattr_full_name(handler, name);
+ return btrfs_set_prop(d_inode(dentry), name, value, size, flags);
}
+static const struct xattr_handler btrfs_security_xattr_handler = {
+ .prefix = XATTR_SECURITY_PREFIX,
+ .get = btrfs_xattr_handler_get,
+ .set = btrfs_xattr_handler_set,
+};
+
+static const struct xattr_handler btrfs_trusted_xattr_handler = {
+ .prefix = XATTR_TRUSTED_PREFIX,
+ .get = btrfs_xattr_handler_get,
+ .set = btrfs_xattr_handler_set,
+};
+
+static const struct xattr_handler btrfs_user_xattr_handler = {
+ .prefix = XATTR_USER_PREFIX,
+ .get = btrfs_xattr_handler_get,
+ .set = btrfs_xattr_handler_set,
+};
+
+static const struct xattr_handler btrfs_btrfs_xattr_handler = {
+ .prefix = XATTR_BTRFS_PREFIX,
+ .get = btrfs_xattr_handler_get,
+ .set = btrfs_xattr_handler_set_prop,
+};
+
+const struct xattr_handler *btrfs_xattr_handlers[] = {
+ &btrfs_security_xattr_handler,
+#ifdef CONFIG_BTRFS_FS_POSIX_ACL
+ &posix_acl_access_xattr_handler,
+ &posix_acl_default_xattr_handler,
+#endif
+ &btrfs_trusted_xattr_handler,
+ &btrfs_user_xattr_handler,
+ &btrfs_btrfs_xattr_handler,
+ NULL,
+};
+
int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
size_t size, int flags)
{
struct btrfs_root *root = BTRFS_I(d_inode(dentry))->root;
- int ret;
- /*
- * The permission on security.* and system.* is not checked
- * in permission().
- */
if (btrfs_root_readonly(root))
return -EROFS;
-
- /*
- * If this is a request for a synthetic attribute in the system.*
- * namespace use the generic infrastructure to resolve a handler
- * for it via sb->s_xattr.
- */
- if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- return generic_setxattr(dentry, name, value, size, flags);
-
- ret = btrfs_is_valid_xattr(name);
- if (ret)
- return ret;
-
- if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
- return btrfs_set_prop(d_inode(dentry), name,
- value, size, flags);
-
- if (size == 0)
- value = ""; /* empty EA, do not remove */
-
- return __btrfs_setxattr(NULL, d_inode(dentry), name, value, size,
- flags);
+ return generic_setxattr(dentry, name, value, size, flags);
}
int btrfs_removexattr(struct dentry *dentry, const char *name)
{
struct btrfs_root *root = BTRFS_I(d_inode(dentry))->root;
- int ret;
- /*
- * The permission on security.* and system.* is not checked
- * in permission().
- */
if (btrfs_root_readonly(root))
return -EROFS;
-
- /*
- * If this is a request for a synthetic attribute in the system.*
- * namespace use the generic infrastructure to resolve a handler
- * for it via sb->s_xattr.
- */
- if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- return generic_removexattr(dentry, name);
-
- ret = btrfs_is_valid_xattr(name);
- if (ret)
- return ret;
-
- if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
- return btrfs_set_prop(d_inode(dentry), name,
- NULL, 0, XATTR_REPLACE);
-
- return __btrfs_setxattr(NULL, d_inode(dentry), name, NULL, 0,
- XATTR_REPLACE);
+ return generic_removexattr(dentry, name);
}
static int btrfs_initxattrs(struct inode *inode,
@@ -494,7 +446,7 @@ static int btrfs_initxattrs(struct inode *inode,
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
name = kmalloc(XATTR_SECURITY_PREFIX_LEN +
- strlen(xattr->name) + 1, GFP_NOFS);
+ strlen(xattr->name) + 1, GFP_KERNEL);
if (!name) {
err = -ENOMEM;
break;
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index 5049608d1..96807b3d2 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -28,8 +28,6 @@ extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
extern int __btrfs_setxattr(struct btrfs_trans_handle *trans,
struct inode *inode, const char *name,
const void *value, size_t size, int flags);
-extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
- void *buffer, size_t size);
extern int btrfs_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags);
extern int btrfs_removexattr(struct dentry *dentry, const char *name);
diff --git a/fs/buffer.c b/fs/buffer.c
index 4f4cd959d..e1632abb4 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -134,13 +134,10 @@ __clear_page_buffers(struct page *page)
static void buffer_io_error(struct buffer_head *bh, char *msg)
{
- char b[BDEVNAME_SIZE];
-
if (!test_bit(BH_Quiet, &bh->b_state))
printk_ratelimited(KERN_ERR
- "Buffer I/O error on dev %s, logical block %llu%s\n",
- bdevname(bh->b_bdev, b),
- (unsigned long long)bh->b_blocknr, msg);
+ "Buffer I/O error on dev %pg, logical block %llu%s\n",
+ bh->b_bdev, (unsigned long long)bh->b_blocknr, msg);
}
/*
@@ -237,15 +234,13 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
* elsewhere, don't buffer_error if we had some unmapped buffers
*/
if (all_mapped) {
- char b[BDEVNAME_SIZE];
-
printk("__find_get_block_slow() failed. "
"block=%llu, b_blocknr=%llu\n",
(unsigned long long)block,
(unsigned long long)bh->b_blocknr);
printk("b_state=0x%08lx, b_size=%zu\n",
bh->b_state, bh->b_size);
- printk("device %s blocksize: %d\n", bdevname(bdev, b),
+ printk("device %pg blocksize: %d\n", bdev,
1 << bd_inode->i_blkbits);
}
out_unlock:
@@ -531,10 +526,8 @@ repeat:
static void do_thaw_one(struct super_block *sb, void *unused)
{
- char b[BDEVNAME_SIZE];
while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
- printk(KERN_WARNING "Emergency Thaw on %s\n",
- bdevname(sb->s_bdev, b));
+ printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev);
}
static void do_thaw_all(struct work_struct *work)
@@ -1074,12 +1067,10 @@ grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
* pagecache index. (this comparison is done using sector_t types).
*/
if (unlikely(index != block >> sizebits)) {
- char b[BDEVNAME_SIZE];
-
printk(KERN_ERR "%s: requested out-of-range block %llu for "
- "device %s\n",
+ "device %pg\n",
__func__, (unsigned long long)block,
- bdevname(bdev, b));
+ bdev);
return -EIO;
}
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index f601def05..452e98dd7 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -226,15 +226,9 @@ static ssize_t cachefiles_daemon_write(struct file *file,
return -EOPNOTSUPP;
/* drag the command string into the kernel so we can parse it */
- data = kmalloc(datalen + 1, GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- ret = -EFAULT;
- if (copy_from_user(data, _data, datalen) != 0)
- goto error;
-
- data[datalen] = '\0';
+ data = memdup_user_nul(_data, datalen);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
ret = -EINVAL;
if (memchr(data, '\0', datalen))
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index afa023dde..675a3332d 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -446,7 +446,7 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
return 0;
cachefiles_begin_secure(cache, &saved_cred);
- mutex_lock(&d_inode(object->backer)->i_mutex);
+ inode_lock(d_inode(object->backer));
/* if there's an extension to a partial page at the end of the backing
* file, we need to discard the partial page so that we pick up new
@@ -465,7 +465,7 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
ret = notify_change(object->backer, &newattrs, NULL);
truncate_failed:
- mutex_unlock(&d_inode(object->backer)->i_mutex);
+ inode_unlock(d_inode(object->backer));
cachefiles_end_secure(cache, saved_cred);
if (ret == -EIO) {
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index c4b893453..1c2334c16 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -295,7 +295,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
cachefiles_mark_object_buried(cache, rep, why);
}
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
if (ret == -EIO)
cachefiles_io_error(cache, "Unlink failed");
@@ -306,7 +306,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
/* directories have to be moved to the graveyard */
_debug("move stale object to graveyard");
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
try_again:
/* first step is to make up a grave dentry in the graveyard */
@@ -423,13 +423,13 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
dir = dget_parent(object->dentry);
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
if (test_bit(FSCACHE_OBJECT_KILLED_BY_CACHE, &object->fscache.flags)) {
/* object allocation for the same key preemptively deleted this
* object's file so that it could create its own file */
_debug("object preemptively buried");
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
ret = 0;
} else {
/* we need to check that our parent is _still_ our parent - it
@@ -442,7 +442,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
/* it got moved, presumably by cachefilesd culling it,
* so it's no longer in the key path and we can ignore
* it */
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
ret = 0;
}
}
@@ -501,7 +501,7 @@ lookup_again:
/* search the current directory for the element name */
_debug("lookup '%s'", name);
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
start = jiffies;
next = lookup_one_len(name, dir, nlen);
@@ -585,7 +585,7 @@ lookup_again:
/* process the next component */
if (key) {
_debug("advance");
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
dput(dir);
dir = next;
next = NULL;
@@ -623,7 +623,7 @@ lookup_again:
/* note that we're now using this object */
ret = cachefiles_mark_object_active(cache, object);
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
dput(dir);
dir = NULL;
@@ -705,7 +705,7 @@ lookup_error:
cachefiles_io_error(cache, "Lookup failed");
next = NULL;
error:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
dput(next);
error_out2:
dput(dir);
@@ -729,7 +729,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
_enter(",,%s", dirname);
/* search the current directory for the element name */
- mutex_lock(&d_inode(dir)->i_mutex);
+ inode_lock(d_inode(dir));
start = jiffies;
subdir = lookup_one_len(dirname, dir, strlen(dirname));
@@ -768,7 +768,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
d_backing_inode(subdir)->i_ino);
}
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
/* we need to make sure the subdir is a directory */
ASSERT(d_backing_inode(subdir));
@@ -800,19 +800,19 @@ check_error:
return ERR_PTR(ret);
mkdir_error:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
dput(subdir);
pr_err("mkdir %s failed with error %d\n", dirname, ret);
return ERR_PTR(ret);
lookup_error:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
ret = PTR_ERR(subdir);
pr_err("Lookup %s failed with error %d\n", dirname, ret);
return ERR_PTR(ret);
nomem_d_alloc:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
_leave(" = -ENOMEM");
return ERR_PTR(-ENOMEM);
}
@@ -837,7 +837,7 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
// dir, filename);
/* look up the victim */
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
start = jiffies;
victim = lookup_one_len(filename, dir, strlen(filename));
@@ -852,7 +852,7 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
* at the netfs's request whilst the cull was in progress
*/
if (d_is_negative(victim)) {
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
dput(victim);
_leave(" = -ENOENT [absent]");
return ERR_PTR(-ENOENT);
@@ -881,13 +881,13 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
object_in_use:
read_unlock(&cache->active_lock);
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
dput(victim);
//_leave(" = -EBUSY [in use]");
return ERR_PTR(-EBUSY);
lookup_error:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
ret = PTR_ERR(victim);
if (ret == -ENOENT) {
/* file or dir now absent - probably retired by netfs */
@@ -947,7 +947,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
return 0;
error_unlock:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
error:
dput(victim);
if (ret == -ENOENT) {
@@ -982,7 +982,7 @@ int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir,
if (IS_ERR(victim))
return PTR_ERR(victim);
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
dput(victim);
//_leave(" = 0");
return 0;
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 8f84646f1..f19708487 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -49,10 +49,10 @@ struct posix_acl *ceph_get_acl(struct inode *inode, int type)
switch (type) {
case ACL_TYPE_ACCESS:
- name = POSIX_ACL_XATTR_ACCESS;
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
break;
case ACL_TYPE_DEFAULT:
- name = POSIX_ACL_XATTR_DEFAULT;
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
BUG();
@@ -92,7 +92,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
switch (type) {
case ACL_TYPE_ACCESS:
- name = POSIX_ACL_XATTR_ACCESS;
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
if (acl) {
ret = posix_acl_equiv_mode(acl, &new_mode);
if (ret < 0)
@@ -106,7 +106,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
ret = acl ? -EINVAL : 0;
goto out;
}
- name = POSIX_ACL_XATTR_DEFAULT;
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
ret = -EINVAL;
@@ -202,11 +202,11 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
ceph_pagelist_encode_32(pagelist, acl && default_acl ? 2 : 1);
if (acl) {
- size_t len = strlen(POSIX_ACL_XATTR_ACCESS);
+ size_t len = strlen(XATTR_NAME_POSIX_ACL_ACCESS);
err = ceph_pagelist_reserve(pagelist, len + val_size1 + 8);
if (err)
goto out_err;
- ceph_pagelist_encode_string(pagelist, POSIX_ACL_XATTR_ACCESS,
+ ceph_pagelist_encode_string(pagelist, XATTR_NAME_POSIX_ACL_ACCESS,
len);
err = posix_acl_to_xattr(&init_user_ns, acl,
tmp_buf, val_size1);
@@ -216,12 +216,12 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
ceph_pagelist_append(pagelist, tmp_buf, val_size1);
}
if (default_acl) {
- size_t len = strlen(POSIX_ACL_XATTR_DEFAULT);
+ size_t len = strlen(XATTR_NAME_POSIX_ACL_DEFAULT);
err = ceph_pagelist_reserve(pagelist, len + val_size2 + 8);
if (err)
goto out_err;
err = ceph_pagelist_encode_string(pagelist,
- POSIX_ACL_XATTR_DEFAULT, len);
+ XATTR_NAME_POSIX_ACL_DEFAULT, len);
err = posix_acl_to_xattr(&init_user_ns, default_acl,
tmp_buf, val_size2);
if (err < 0)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index b7d218a16..19adeb0ef 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1108,7 +1108,7 @@ retry_locked:
return 0;
/* past end of file? */
- i_size = inode->i_size; /* caller holds i_mutex */
+ i_size = i_size_read(inode);
if (page_off >= i_size ||
(pos_in_page == 0 && (pos+len) >= i_size &&
@@ -1149,7 +1149,6 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
page = grab_cache_page_write_begin(mapping, index, 0);
if (!page)
return -ENOMEM;
- *pagep = page;
dout("write_begin file %p inode %p page %p %d~%d\n", file,
inode, page, (int)pos, (int)len);
@@ -1184,8 +1183,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
zero_user_segment(page, from+copied, len);
/* did file size increase? */
- /* (no need for i_size_read(); we caller holds i_mutex */
- if (pos+copied > inode->i_size)
+ if (pos+copied > i_size_read(inode))
check_cap = ceph_inode_set_size(inode, pos+copied);
if (!PageUptodate(page))
@@ -1378,11 +1376,13 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
ret = VM_FAULT_NOPAGE;
if ((off > size) ||
- (page->mapping != inode->i_mapping))
+ (page->mapping != inode->i_mapping)) {
+ unlock_page(page);
goto out;
+ }
ret = ceph_update_writeable_page(vma->vm_file, off, len, page);
- if (ret == 0) {
+ if (ret >= 0) {
/* success. we'll keep the page locked. */
set_page_dirty(page);
ret = VM_FAULT_LOCKED;
@@ -1393,8 +1393,6 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
ret = VM_FAULT_SIGBUS;
}
out:
- if (ret != VM_FAULT_LOCKED)
- unlock_page(page);
if (ret == VM_FAULT_LOCKED ||
ci->i_inline_version != CEPH_INLINE_NONE) {
int dirty;
@@ -1758,6 +1756,10 @@ int ceph_pool_perm_check(struct ceph_inode_info *ci, int need)
u32 pool;
int ret, flags;
+ /* does not support pool namespace yet */
+ if (ci->i_pool_ns_len)
+ return -EIO;
+
if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode),
NOPOOLPERM))
return 0;
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index a4766ded1..a351480db 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -106,7 +106,7 @@ static uint16_t ceph_fscache_inode_get_aux(const void *cookie_netfs_data,
memset(&aux, 0, sizeof(aux));
aux.mtime = inode->i_mtime;
- aux.size = inode->i_size;
+ aux.size = i_size_read(inode);
memcpy(buffer, &aux, sizeof(aux));
@@ -117,9 +117,7 @@ static void ceph_fscache_inode_get_attr(const void *cookie_netfs_data,
uint64_t *size)
{
const struct ceph_inode_info* ci = cookie_netfs_data;
- const struct inode* inode = &ci->vfs_inode;
-
- *size = inode->i_size;
+ *size = i_size_read(&ci->vfs_inode);
}
static enum fscache_checkaux ceph_fscache_inode_check_aux(
@@ -134,7 +132,7 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux(
memset(&aux, 0, sizeof(aux));
aux.mtime = inode->i_mtime;
- aux.size = inode->i_size;
+ aux.size = i_size_read(inode);
if (memcmp(data, &aux, sizeof(aux)) != 0)
return FSCACHE_CHECKAUX_OBSOLETE;
@@ -197,7 +195,7 @@ void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
return;
/* Avoid multiple racing open requests */
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (ci->fscache)
goto done;
@@ -207,7 +205,7 @@ void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
ci, true);
fscache_check_consistency(ci->fscache);
done:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index c69e1253b..6fe0ad26a 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2030,7 +2030,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
if (datasync)
goto out;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
dirty = try_flush_caps(inode, &flush_tid);
dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
@@ -2046,7 +2046,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
ret = wait_event_interruptible(ci->i_cap_wq,
caps_are_flushed(inode, flush_tid));
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
out:
dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret);
return ret;
@@ -2753,7 +2753,8 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
void *inline_data, int inline_len,
struct ceph_buffer *xattr_buf,
struct ceph_mds_session *session,
- struct ceph_cap *cap, int issued)
+ struct ceph_cap *cap, int issued,
+ u32 pool_ns_len)
__releases(ci->i_ceph_lock)
__releases(mdsc->snap_rwsem)
{
@@ -2873,6 +2874,8 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) {
/* file layout may have changed */
ci->i_layout = grant->layout;
+ ci->i_pool_ns_len = pool_ns_len;
+
/* size/truncate_seq? */
queue_trunc = ceph_fill_file_size(inode, issued,
le32_to_cpu(grant->truncate_seq),
@@ -3411,6 +3414,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
u32 inline_len = 0;
void *snaptrace;
size_t snaptrace_len;
+ u32 pool_ns_len = 0;
void *p, *end;
dout("handle_caps from mds%d\n", mds);
@@ -3463,6 +3467,21 @@ void ceph_handle_caps(struct ceph_mds_session *session,
p += inline_len;
}
+ if (le16_to_cpu(msg->hdr.version) >= 8) {
+ u64 flush_tid;
+ u32 caller_uid, caller_gid;
+ u32 osd_epoch_barrier;
+ /* version >= 5 */
+ ceph_decode_32_safe(&p, end, osd_epoch_barrier, bad);
+ /* version >= 6 */
+ ceph_decode_64_safe(&p, end, flush_tid, bad);
+ /* version >= 7 */
+ ceph_decode_32_safe(&p, end, caller_uid, bad);
+ ceph_decode_32_safe(&p, end, caller_gid, bad);
+ /* version >= 8 */
+ ceph_decode_32_safe(&p, end, pool_ns_len, bad);
+ }
+
/* lookup ino */
inode = ceph_find_inode(sb, vino);
ci = ceph_inode(inode);
@@ -3518,7 +3537,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
&cap, &issued);
handle_cap_grant(mdsc, inode, h,
inline_version, inline_data, inline_len,
- msg->middle, session, cap, issued);
+ msg->middle, session, cap, issued,
+ pool_ns_len);
if (realm)
ceph_put_snap_realm(mdsc, realm);
goto done_unlocked;
@@ -3542,7 +3562,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
issued |= __ceph_caps_dirty(ci);
handle_cap_grant(mdsc, inode, h,
inline_version, inline_data, inline_len,
- msg->middle, session, cap, issued);
+ msg->middle, session, cap, issued,
+ pool_ns_len);
goto done_unlocked;
case CEPH_CAP_OP_FLUSH_ACK:
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 9314b4ea2..fd11fb231 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -507,7 +507,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
loff_t old_offset = ceph_make_fpos(fi->frag, fi->next_offset);
loff_t retval;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
retval = -EINVAL;
switch (whence) {
case SEEK_CUR:
@@ -542,7 +542,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
}
}
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return retval;
}
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index fe02ae7f0..3b3172357 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -215,7 +215,7 @@ static int ceph_get_name(struct dentry *parent, char *name,
if (IS_ERR(req))
return PTR_ERR(req);
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
req->r_inode = d_inode(child);
ihold(d_inode(child));
@@ -224,7 +224,7 @@ static int ceph_get_name(struct dentry *parent, char *name,
req->r_num_caps = 2;
err = ceph_mdsc_do_request(mdsc, NULL, req);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
if (!err) {
struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 3c68e6aee..eb9028e8c 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -397,8 +397,9 @@ int ceph_release(struct inode *inode, struct file *file)
}
enum {
- CHECK_EOF = 1,
- READ_INLINE = 2,
+ HAVE_RETRIED = 1,
+ CHECK_EOF = 2,
+ READ_INLINE = 3,
};
/*
@@ -411,17 +412,15 @@ enum {
static int striped_read(struct inode *inode,
u64 off, u64 len,
struct page **pages, int num_pages,
- int *checkeof, bool o_direct,
- unsigned long buf_align)
+ int *checkeof)
{
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode);
u64 pos, this_len, left;
- int io_align, page_align;
- int pages_left;
- int read;
+ loff_t i_size;
+ int page_align, pages_left;
+ int read, ret;
struct page **page_pos;
- int ret;
bool hit_stripe, was_short;
/*
@@ -432,13 +431,9 @@ static int striped_read(struct inode *inode,
page_pos = pages;
pages_left = num_pages;
read = 0;
- io_align = off & ~PAGE_MASK;
more:
- if (o_direct)
- page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
- else
- page_align = pos & ~PAGE_MASK;
+ page_align = pos & ~PAGE_MASK;
this_len = left;
ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
&ci->i_layout, pos, &this_len,
@@ -452,13 +447,12 @@ more:
dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read,
ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
+ i_size = i_size_read(inode);
if (ret >= 0) {
int didpages;
- if (was_short && (pos + ret < inode->i_size)) {
- int zlen = min(this_len - ret,
- inode->i_size - pos - ret);
- int zoff = (o_direct ? buf_align : io_align) +
- read + ret;
+ if (was_short && (pos + ret < i_size)) {
+ int zlen = min(this_len - ret, i_size - pos - ret);
+ int zoff = (off & ~PAGE_MASK) + read + ret;
dout(" zero gap %llu to %llu\n",
pos + ret, pos + ret + zlen);
ceph_zero_page_vector_range(zoff, zlen, pages);
@@ -473,14 +467,14 @@ more:
pages_left -= didpages;
/* hit stripe and need continue*/
- if (left && hit_stripe && pos < inode->i_size)
+ if (left && hit_stripe && pos < i_size)
goto more;
}
if (read > 0) {
ret = read;
/* did we bounce off eof? */
- if (pos + left > inode->i_size)
+ if (pos + left > i_size)
*checkeof = CHECK_EOF;
}
@@ -521,54 +515,28 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
if (ret < 0)
return ret;
- if (iocb->ki_flags & IOCB_DIRECT) {
- while (iov_iter_count(i)) {
- size_t start;
- ssize_t n;
-
- n = dio_get_pagev_size(i);
- pages = dio_get_pages_alloc(i, n, &start, &num_pages);
- if (IS_ERR(pages))
- return PTR_ERR(pages);
-
- ret = striped_read(inode, off, n,
- pages, num_pages, checkeof,
- 1, start);
-
- ceph_put_page_vector(pages, num_pages, true);
-
- if (ret <= 0)
- break;
- off += ret;
- iov_iter_advance(i, ret);
- if (ret < n)
+ num_pages = calc_pages_for(off, len);
+ pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+ ret = striped_read(inode, off, len, pages,
+ num_pages, checkeof);
+ if (ret > 0) {
+ int l, k = 0;
+ size_t left = ret;
+
+ while (left) {
+ size_t page_off = off & ~PAGE_MASK;
+ size_t copy = min_t(size_t, left,
+ PAGE_SIZE - page_off);
+ l = copy_page_to_iter(pages[k++], page_off, copy, i);
+ off += l;
+ left -= l;
+ if (l < copy)
break;
}
- } else {
- num_pages = calc_pages_for(off, len);
- pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
- if (IS_ERR(pages))
- return PTR_ERR(pages);
- ret = striped_read(inode, off, len, pages,
- num_pages, checkeof, 0, 0);
- if (ret > 0) {
- int l, k = 0;
- size_t left = ret;
-
- while (left) {
- size_t page_off = off & ~PAGE_MASK;
- size_t copy = min_t(size_t,
- PAGE_SIZE - page_off, left);
- l = copy_page_to_iter(pages[k++], page_off,
- copy, i);
- off += l;
- left -= l;
- if (l < copy)
- break;
- }
- }
- ceph_release_page_vector(pages, num_pages);
}
+ ceph_release_page_vector(pages, num_pages);
if (off > iocb->ki_pos) {
ret = off - iocb->ki_pos;
@@ -579,6 +547,193 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
return ret;
}
+struct ceph_aio_request {
+ struct kiocb *iocb;
+ size_t total_len;
+ int write;
+ int error;
+ struct list_head osd_reqs;
+ unsigned num_reqs;
+ atomic_t pending_reqs;
+ struct timespec mtime;
+ struct ceph_cap_flush *prealloc_cf;
+};
+
+struct ceph_aio_work {
+ struct work_struct work;
+ struct ceph_osd_request *req;
+};
+
+static void ceph_aio_retry_work(struct work_struct *work);
+
+static void ceph_aio_complete(struct inode *inode,
+ struct ceph_aio_request *aio_req)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int ret;
+
+ if (!atomic_dec_and_test(&aio_req->pending_reqs))
+ return;
+
+ ret = aio_req->error;
+ if (!ret)
+ ret = aio_req->total_len;
+
+ dout("ceph_aio_complete %p rc %d\n", inode, ret);
+
+ if (ret >= 0 && aio_req->write) {
+ int dirty;
+
+ loff_t endoff = aio_req->iocb->ki_pos + aio_req->total_len;
+ if (endoff > i_size_read(inode)) {
+ if (ceph_inode_set_size(inode, endoff))
+ ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
+ }
+
+ spin_lock(&ci->i_ceph_lock);
+ ci->i_inline_version = CEPH_INLINE_NONE;
+ dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
+ &aio_req->prealloc_cf);
+ spin_unlock(&ci->i_ceph_lock);
+ if (dirty)
+ __mark_inode_dirty(inode, dirty);
+
+ }
+
+ ceph_put_cap_refs(ci, (aio_req->write ? CEPH_CAP_FILE_WR :
+ CEPH_CAP_FILE_RD));
+
+ aio_req->iocb->ki_complete(aio_req->iocb, ret, 0);
+
+ ceph_free_cap_flush(aio_req->prealloc_cf);
+ kfree(aio_req);
+}
+
+static void ceph_aio_complete_req(struct ceph_osd_request *req,
+ struct ceph_msg *msg)
+{
+ int rc = req->r_result;
+ struct inode *inode = req->r_inode;
+ struct ceph_aio_request *aio_req = req->r_priv;
+ struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
+ int num_pages = calc_pages_for((u64)osd_data->alignment,
+ osd_data->length);
+
+ dout("ceph_aio_complete_req %p rc %d bytes %llu\n",
+ inode, rc, osd_data->length);
+
+ if (rc == -EOLDSNAPC) {
+ struct ceph_aio_work *aio_work;
+ BUG_ON(!aio_req->write);
+
+ aio_work = kmalloc(sizeof(*aio_work), GFP_NOFS);
+ if (aio_work) {
+ INIT_WORK(&aio_work->work, ceph_aio_retry_work);
+ aio_work->req = req;
+ queue_work(ceph_inode_to_client(inode)->wb_wq,
+ &aio_work->work);
+ return;
+ }
+ rc = -ENOMEM;
+ } else if (!aio_req->write) {
+ if (rc == -ENOENT)
+ rc = 0;
+ if (rc >= 0 && osd_data->length > rc) {
+ int zoff = osd_data->alignment + rc;
+ int zlen = osd_data->length - rc;
+ /*
+ * If read is satisfied by single OSD request,
+ * it can pass EOF. Otherwise read is within
+ * i_size.
+ */
+ if (aio_req->num_reqs == 1) {
+ loff_t i_size = i_size_read(inode);
+ loff_t endoff = aio_req->iocb->ki_pos + rc;
+ if (endoff < i_size)
+ zlen = min_t(size_t, zlen,
+ i_size - endoff);
+ aio_req->total_len = rc + zlen;
+ }
+
+ if (zlen > 0)
+ ceph_zero_page_vector_range(zoff, zlen,
+ osd_data->pages);
+ }
+ }
+
+ ceph_put_page_vector(osd_data->pages, num_pages, false);
+ ceph_osdc_put_request(req);
+
+ if (rc < 0)
+ cmpxchg(&aio_req->error, 0, rc);
+
+ ceph_aio_complete(inode, aio_req);
+ return;
+}
+
+static void ceph_aio_retry_work(struct work_struct *work)
+{
+ struct ceph_aio_work *aio_work =
+ container_of(work, struct ceph_aio_work, work);
+ struct ceph_osd_request *orig_req = aio_work->req;
+ struct ceph_aio_request *aio_req = orig_req->r_priv;
+ struct inode *inode = orig_req->r_inode;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_snap_context *snapc;
+ struct ceph_osd_request *req;
+ int ret;
+
+ spin_lock(&ci->i_ceph_lock);
+ if (__ceph_have_pending_cap_snap(ci)) {
+ struct ceph_cap_snap *capsnap =
+ list_last_entry(&ci->i_cap_snaps,
+ struct ceph_cap_snap,
+ ci_item);
+ snapc = ceph_get_snap_context(capsnap->context);
+ } else {
+ BUG_ON(!ci->i_head_snapc);
+ snapc = ceph_get_snap_context(ci->i_head_snapc);
+ }
+ spin_unlock(&ci->i_ceph_lock);
+
+ req = ceph_osdc_alloc_request(orig_req->r_osdc, snapc, 2,
+ false, GFP_NOFS);
+ if (!req) {
+ ret = -ENOMEM;
+ req = orig_req;
+ goto out;
+ }
+
+ req->r_flags = CEPH_OSD_FLAG_ORDERSNAP |
+ CEPH_OSD_FLAG_ONDISK |
+ CEPH_OSD_FLAG_WRITE;
+ req->r_base_oloc = orig_req->r_base_oloc;
+ req->r_base_oid = orig_req->r_base_oid;
+
+ req->r_ops[0] = orig_req->r_ops[0];
+ osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0);
+
+ ceph_osdc_build_request(req, req->r_ops[0].extent.offset,
+ snapc, CEPH_NOSNAP, &aio_req->mtime);
+
+ ceph_osdc_put_request(orig_req);
+
+ req->r_callback = ceph_aio_complete_req;
+ req->r_inode = inode;
+ req->r_priv = aio_req;
+
+ ret = ceph_osdc_start_request(req->r_osdc, req, false);
+out:
+ if (ret < 0) {
+ BUG_ON(ret == -EOLDSNAPC);
+ req->r_result = ret;
+ ceph_aio_complete_req(req, NULL);
+ }
+
+ ceph_put_snap_context(snapc);
+ kfree(aio_work);
+}
+
/*
* Write commit request unsafe callback, called to tell us when a
* request is unsafe (that is, in flight--has been handed to the
@@ -612,16 +767,10 @@ static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe)
}
-/*
- * Synchronous write, straight from __user pointer or user pages.
- *
- * If write spans object boundary, just do multiple writes. (For a
- * correct atomic write, we should e.g. take write locks on all
- * objects, rollback on failure, etc.)
- */
static ssize_t
-ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
- struct ceph_snap_context *snapc)
+ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
+ struct ceph_snap_context *snapc,
+ struct ceph_cap_flush **pcf)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
@@ -630,44 +779,52 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
struct ceph_vino vino;
struct ceph_osd_request *req;
struct page **pages;
- int num_pages;
- int written = 0;
+ struct ceph_aio_request *aio_req = NULL;
+ int num_pages = 0;
int flags;
- int check_caps = 0;
int ret;
struct timespec mtime = CURRENT_TIME;
- size_t count = iov_iter_count(from);
+ size_t count = iov_iter_count(iter);
+ loff_t pos = iocb->ki_pos;
+ bool write = iov_iter_rw(iter) == WRITE;
- if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
+ if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP)
return -EROFS;
- dout("sync_direct_write on file %p %lld~%u\n", file, pos,
- (unsigned)count);
+ dout("sync_direct_read_write (%s) on file %p %lld~%u\n",
+ (write ? "write" : "read"), file, pos, (unsigned)count);
ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + count);
if (ret < 0)
return ret;
- ret = invalidate_inode_pages2_range(inode->i_mapping,
- pos >> PAGE_CACHE_SHIFT,
- (pos + count) >> PAGE_CACHE_SHIFT);
- if (ret < 0)
- dout("invalidate_inode_pages2_range returned %d\n", ret);
+ if (write) {
+ ret = invalidate_inode_pages2_range(inode->i_mapping,
+ pos >> PAGE_CACHE_SHIFT,
+ (pos + count) >> PAGE_CACHE_SHIFT);
+ if (ret < 0)
+ dout("invalidate_inode_pages2_range returned %d\n", ret);
- flags = CEPH_OSD_FLAG_ORDERSNAP |
- CEPH_OSD_FLAG_ONDISK |
- CEPH_OSD_FLAG_WRITE;
+ flags = CEPH_OSD_FLAG_ORDERSNAP |
+ CEPH_OSD_FLAG_ONDISK |
+ CEPH_OSD_FLAG_WRITE;
+ } else {
+ flags = CEPH_OSD_FLAG_READ;
+ }
- while (iov_iter_count(from) > 0) {
- u64 len = dio_get_pagev_size(from);
- size_t start;
- ssize_t n;
+ while (iov_iter_count(iter) > 0) {
+ u64 size = dio_get_pagev_size(iter);
+ size_t start = 0;
+ ssize_t len;
vino = ceph_vino(inode);
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
- vino, pos, &len, 0,
- 2,/*include a 'startsync' command*/
- CEPH_OSD_OP_WRITE, flags, snapc,
+ vino, pos, &size, 0,
+ /*include a 'startsync' command*/
+ write ? 2 : 1,
+ write ? CEPH_OSD_OP_WRITE :
+ CEPH_OSD_OP_READ,
+ flags, snapc,
ci->i_truncate_seq,
ci->i_truncate_size,
false);
@@ -676,10 +833,8 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
break;
}
- osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0);
-
- n = len;
- pages = dio_get_pages_alloc(from, len, &start, &num_pages);
+ len = size;
+ pages = dio_get_pages_alloc(iter, len, &start, &num_pages);
if (IS_ERR(pages)) {
ceph_osdc_put_request(req);
ret = PTR_ERR(pages);
@@ -687,47 +842,128 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
}
/*
- * throw out any page cache pages in this range. this
- * may block.
+ * To simplify error handling, allow AIO when IO within i_size
+ * or IO can be satisfied by single OSD request.
*/
- truncate_inode_pages_range(inode->i_mapping, pos,
- (pos+n) | (PAGE_CACHE_SIZE-1));
- osd_req_op_extent_osd_data_pages(req, 0, pages, n, start,
- false, false);
+ if (pos == iocb->ki_pos && !is_sync_kiocb(iocb) &&
+ (len == count || pos + count <= i_size_read(inode))) {
+ aio_req = kzalloc(sizeof(*aio_req), GFP_KERNEL);
+ if (aio_req) {
+ aio_req->iocb = iocb;
+ aio_req->write = write;
+ INIT_LIST_HEAD(&aio_req->osd_reqs);
+ if (write) {
+ aio_req->mtime = mtime;
+ swap(aio_req->prealloc_cf, *pcf);
+ }
+ }
+ /* ignore error */
+ }
+
+ if (write) {
+ /*
+ * throw out any page cache pages in this range. this
+ * may block.
+ */
+ truncate_inode_pages_range(inode->i_mapping, pos,
+ (pos+len) | (PAGE_CACHE_SIZE - 1));
+
+ osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0);
+ }
+
+
+ osd_req_op_extent_osd_data_pages(req, 0, pages, len, start,
+ false, false);
- /* BUG_ON(vino.snap != CEPH_NOSNAP); */
ceph_osdc_build_request(req, pos, snapc, vino.snap, &mtime);
- ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
+ if (aio_req) {
+ aio_req->total_len += len;
+ aio_req->num_reqs++;
+ atomic_inc(&aio_req->pending_reqs);
+
+ req->r_callback = ceph_aio_complete_req;
+ req->r_inode = inode;
+ req->r_priv = aio_req;
+ list_add_tail(&req->r_unsafe_item, &aio_req->osd_reqs);
+
+ pos += len;
+ iov_iter_advance(iter, len);
+ continue;
+ }
+
+ ret = ceph_osdc_start_request(req->r_osdc, req, false);
if (!ret)
ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+ size = i_size_read(inode);
+ if (!write) {
+ if (ret == -ENOENT)
+ ret = 0;
+ if (ret >= 0 && ret < len && pos + ret < size) {
+ int zlen = min_t(size_t, len - ret,
+ size - pos - ret);
+ ceph_zero_page_vector_range(start + ret, zlen,
+ pages);
+ ret += zlen;
+ }
+ if (ret >= 0)
+ len = ret;
+ }
+
ceph_put_page_vector(pages, num_pages, false);
ceph_osdc_put_request(req);
- if (ret)
+ if (ret < 0)
+ break;
+
+ pos += len;
+ iov_iter_advance(iter, len);
+
+ if (!write && pos >= size)
break;
- pos += n;
- written += n;
- iov_iter_advance(from, n);
- if (pos > i_size_read(inode)) {
- check_caps = ceph_inode_set_size(inode, pos);
- if (check_caps)
+ if (write && pos > size) {
+ if (ceph_inode_set_size(inode, pos))
ceph_check_caps(ceph_inode(inode),
CHECK_CAPS_AUTHONLY,
NULL);
}
}
- if (ret != -EOLDSNAPC && written > 0) {
+ if (aio_req) {
+ if (aio_req->num_reqs == 0) {
+ kfree(aio_req);
+ return ret;
+ }
+
+ ceph_get_cap_refs(ci, write ? CEPH_CAP_FILE_WR :
+ CEPH_CAP_FILE_RD);
+
+ while (!list_empty(&aio_req->osd_reqs)) {
+ req = list_first_entry(&aio_req->osd_reqs,
+ struct ceph_osd_request,
+ r_unsafe_item);
+ list_del_init(&req->r_unsafe_item);
+ if (ret >= 0)
+ ret = ceph_osdc_start_request(req->r_osdc,
+ req, false);
+ if (ret < 0) {
+ BUG_ON(ret == -EOLDSNAPC);
+ req->r_result = ret;
+ ceph_aio_complete_req(req, NULL);
+ }
+ }
+ return -EIOCBQUEUED;
+ }
+
+ if (ret != -EOLDSNAPC && pos > iocb->ki_pos) {
+ ret = pos - iocb->ki_pos;
iocb->ki_pos = pos;
- ret = written;
}
return ret;
}
-
/*
* Synchronous write, straight from __user pointer or user pages.
*
@@ -897,8 +1133,14 @@ again:
ceph_cap_string(got));
if (ci->i_inline_version == CEPH_INLINE_NONE) {
- /* hmm, this isn't really async... */
- ret = ceph_sync_read(iocb, to, &retry_op);
+ if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) {
+ ret = ceph_direct_read_write(iocb, to,
+ NULL, NULL);
+ if (ret >= 0 && ret < len)
+ retry_op = CHECK_EOF;
+ } else {
+ ret = ceph_sync_read(iocb, to, &retry_op);
+ }
} else {
retry_op = READ_INLINE;
}
@@ -916,7 +1158,7 @@ again:
pinned_page = NULL;
}
ceph_put_cap_refs(ci, got);
- if (retry_op && ret >= 0) {
+ if (retry_op > HAVE_RETRIED && ret >= 0) {
int statret;
struct page *page = NULL;
loff_t i_size;
@@ -968,12 +1210,11 @@ again:
if (retry_op == CHECK_EOF && iocb->ki_pos < i_size &&
ret < len) {
dout("sync_read hit hole, ppos %lld < size %lld"
- ", reading more\n", iocb->ki_pos,
- inode->i_size);
+ ", reading more\n", iocb->ki_pos, i_size);
read += ret;
len -= ret;
- retry_op = 0;
+ retry_op = HAVE_RETRIED;
goto again;
}
}
@@ -1014,7 +1255,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (!prealloc_cf)
return -ENOMEM;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* We can write back this queue in page reclaim */
current->backing_dev_info = inode_to_bdi(inode);
@@ -1052,7 +1293,7 @@ retry_snap:
}
dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
- inode, ceph_vinop(inode), pos, count, inode->i_size);
+ inode, ceph_vinop(inode), pos, count, i_size_read(inode));
if (fi->fmode & CEPH_FILE_MODE_LAZY)
want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
else
@@ -1070,7 +1311,7 @@ retry_snap:
(iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
struct ceph_snap_context *snapc;
struct iov_iter data;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
spin_lock(&ci->i_ceph_lock);
if (__ceph_have_pending_cap_snap(ci)) {
@@ -1088,8 +1329,8 @@ retry_snap:
/* we might need to revert back to that point */
data = *from;
if (iocb->ki_flags & IOCB_DIRECT)
- written = ceph_sync_direct_write(iocb, &data, pos,
- snapc);
+ written = ceph_direct_read_write(iocb, &data, snapc,
+ &prealloc_cf);
else
written = ceph_sync_write(iocb, &data, pos, snapc);
if (written == -EOLDSNAPC) {
@@ -1097,14 +1338,14 @@ retry_snap:
"got EOLDSNAPC, retrying\n",
inode, ceph_vinop(inode),
pos, (unsigned)count);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
goto retry_snap;
}
if (written > 0)
iov_iter_advance(from, written);
ceph_put_snap_context(snapc);
} else {
- loff_t old_size = inode->i_size;
+ loff_t old_size = i_size_read(inode);
/*
* No need to acquire the i_truncate_mutex. Because
* the MDS revokes Fwb caps before sending truncate
@@ -1115,9 +1356,9 @@ retry_snap:
written = generic_perform_write(file, from, pos);
if (likely(written >= 0))
iocb->ki_pos = pos + written;
- if (inode->i_size > old_size)
+ if (i_size_read(inode) > old_size)
ceph_fscache_update_objectsize(inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
if (written >= 0) {
@@ -1147,7 +1388,7 @@ retry_snap:
goto out_unlocked;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
out_unlocked:
ceph_free_cap_flush(prealloc_cf);
current->backing_dev_info = NULL;
@@ -1160,9 +1401,10 @@ out_unlocked:
static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
{
struct inode *inode = file->f_mapping->host;
+ loff_t i_size;
int ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) {
ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
@@ -1172,9 +1414,10 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
}
}
+ i_size = i_size_read(inode);
switch (whence) {
case SEEK_END:
- offset += inode->i_size;
+ offset += i_size;
break;
case SEEK_CUR:
/*
@@ -1190,24 +1433,24 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
offset += file->f_pos;
break;
case SEEK_DATA:
- if (offset >= inode->i_size) {
+ if (offset >= i_size) {
ret = -ENXIO;
goto out;
}
break;
case SEEK_HOLE:
- if (offset >= inode->i_size) {
+ if (offset >= i_size) {
ret = -ENXIO;
goto out;
}
- offset = inode->i_size;
+ offset = i_size;
break;
}
offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return offset;
}
@@ -1363,7 +1606,7 @@ static long ceph_fallocate(struct file *file, int mode,
if (!prealloc_cf)
return -ENOMEM;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (ceph_snap(inode) != CEPH_NOSNAP) {
ret = -EROFS;
@@ -1418,7 +1661,7 @@ static long ceph_fallocate(struct file *file, int mode,
ceph_put_cap_refs(ci, got);
unlock:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
ceph_free_cap_flush(prealloc_cf);
return ret;
}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 498dcfa2d..5849b88bb 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -396,6 +396,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
ci->i_symlink = NULL;
memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
+ ci->i_pool_ns_len = 0;
ci->i_fragtree = RB_ROOT;
mutex_init(&ci->i_fragtree_mutex);
@@ -548,7 +549,7 @@ int ceph_fill_file_size(struct inode *inode, int issued,
if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) > 0 ||
(truncate_seq == ci->i_truncate_seq && size > inode->i_size)) {
dout("size %lld -> %llu\n", inode->i_size, size);
- inode->i_size = size;
+ i_size_write(inode, size);
inode->i_blocks = (size + (1<<9) - 1) >> 9;
ci->i_reported_size = size;
if (truncate_seq != ci->i_truncate_seq) {
@@ -756,6 +757,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
if (ci->i_layout.fl_pg_pool != info->layout.fl_pg_pool)
ci->i_ceph_flags &= ~CEPH_I_POOL_PERM;
ci->i_layout = info->layout;
+ ci->i_pool_ns_len = iinfo->pool_ns_len;
queue_trunc = ceph_fill_file_size(inode, issued,
le32_to_cpu(info->truncate_seq),
@@ -808,7 +810,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
spin_unlock(&ci->i_ceph_lock);
err = -EINVAL;
- if (WARN_ON(symlen != inode->i_size))
+ if (WARN_ON(symlen != i_size_read(inode)))
goto out;
err = -ENOMEM;
@@ -1549,7 +1551,7 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
spin_lock(&ci->i_ceph_lock);
dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
- inode->i_size = size;
+ i_size_write(inode, size);
inode->i_blocks = (size + (1 << 9) - 1) >> 9;
/* tell the MDS if we are approaching max_size */
@@ -1756,7 +1758,7 @@ retry:
*/
static const struct inode_operations ceph_symlink_iops = {
.readlink = generic_readlink,
- .follow_link = simple_follow_link,
+ .get_link = simple_get_link,
.setattr = ceph_setattr,
.getattr = ceph_getattr,
.setxattr = ceph_setxattr,
@@ -1911,7 +1913,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
inode->i_size, attr->ia_size);
if ((issued & CEPH_CAP_FILE_EXCL) &&
attr->ia_size > inode->i_size) {
- inode->i_size = attr->ia_size;
+ i_size_write(inode, attr->ia_size);
inode->i_blocks =
(attr->ia_size + (1 << 9) - 1) >> 9;
inode->i_ctime = attr->ia_ctime;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index e7b130a63..911d64d86 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -100,6 +100,14 @@ static int parse_reply_info_in(void **p, void *end,
} else
info->inline_version = CEPH_INLINE_NONE;
+ if (features & CEPH_FEATURE_FS_FILE_LAYOUT_V2) {
+ ceph_decode_32_safe(p, end, info->pool_ns_len, bad);
+ ceph_decode_need(p, end, info->pool_ns_len, bad);
+ *p += info->pool_ns_len;
+ } else {
+ info->pool_ns_len = 0;
+ }
+
return 0;
bad:
return err;
@@ -2298,6 +2306,14 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
CEPH_CAP_PIN);
+ /* deny access to directories with pool_ns layouts */
+ if (req->r_inode && S_ISDIR(req->r_inode->i_mode) &&
+ ceph_inode(req->r_inode)->i_pool_ns_len)
+ return -EIO;
+ if (req->r_locked_dir &&
+ ceph_inode(req->r_locked_dir)->i_pool_ns_len)
+ return -EIO;
+
/* issue */
mutex_lock(&mdsc->mutex);
__register_request(mdsc, req, dir);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index ccf11ef0c..37712ccff 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -44,6 +44,7 @@ struct ceph_mds_reply_info_in {
u64 inline_version;
u32 inline_len;
char *inline_data;
+ u32 pool_ns_len;
};
/*
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index f446afada..ca4d5e845 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -639,8 +639,8 @@ static int __init init_caches(void)
ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
sizeof(struct ceph_inode_info),
__alignof__(struct ceph_inode_info),
- (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
- ceph_inode_init_once);
+ SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
+ SLAB_ACCOUNT, ceph_inode_init_once);
if (ceph_inode_cachep == NULL)
return -ENOMEM;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 75b7d125c..9c458eb52 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -287,6 +287,7 @@ struct ceph_inode_info {
struct ceph_dir_layout i_dir_layout;
struct ceph_file_layout i_layout;
+ size_t i_pool_ns_len;
char *i_symlink;
/* for dirs */
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 7dc886c9a..e956cba94 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -175,7 +175,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
* string to the length of the original string to allow for worst case.
*/
md_len = strlen(sb_mountdata) + INET6_ADDRSTRLEN;
- mountdata = kzalloc(md_len + 1, GFP_KERNEL);
+ mountdata = kzalloc(md_len + sizeof("ip=") + 1, GFP_KERNEL);
if (mountdata == NULL) {
rc = -ENOMEM;
goto compose_mount_options_err;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index cbc0f4bca..2eea40353 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -507,6 +507,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
seq_printf(s, ",rsize=%u", cifs_sb->rsize);
seq_printf(s, ",wsize=%u", cifs_sb->wsize);
+ seq_printf(s, ",echo_interval=%lu",
+ tcon->ses->server->echo_interval / HZ);
/* convert actimeo and display it in seconds */
seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ);
@@ -640,9 +642,9 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
while (*s && *s != sep)
s++;
- mutex_lock(&dir->i_mutex);
+ inode_lock(dir);
child = lookup_one_len(p, dentry, s - p);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
dput(dentry);
dentry = child;
} while (!IS_ERR(dentry));
@@ -752,6 +754,9 @@ cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter)
ssize_t rc;
struct inode *inode = file_inode(iocb->ki_filp);
+ if (iocb->ki_filp->f_flags & O_DIRECT)
+ return cifs_user_readv(iocb, iter);
+
rc = cifs_revalidate_mapping(inode);
if (rc)
return rc;
@@ -766,6 +771,18 @@ static ssize_t cifs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
ssize_t written;
int rc;
+ if (iocb->ki_filp->f_flags & O_DIRECT) {
+ written = cifs_user_writev(iocb, from);
+ if (written > 0 && CIFS_CACHE_READ(cinode)) {
+ cifs_zap_mapping(inode);
+ cifs_dbg(FYI,
+ "Set no oplock for inode=%p after a write operation\n",
+ inode);
+ cinode->oplock = 0;
+ }
+ return written;
+ }
+
written = cifs_get_writer(cinode);
if (written)
return written;
@@ -900,8 +917,7 @@ const struct inode_operations cifs_file_inode_ops = {
const struct inode_operations cifs_symlink_inode_ops = {
.readlink = generic_readlink,
- .follow_link = cifs_follow_link,
- .put_link = kfree_put_link,
+ .get_link = cifs_get_link,
.permission = cifs_permission,
/* BB add the following two eventually */
/* revalidate: cifs_revalidate,
@@ -914,6 +930,59 @@ const struct inode_operations cifs_symlink_inode_ops = {
#endif
};
+static int cifs_clone_file_range(struct file *src_file, loff_t off,
+ struct file *dst_file, loff_t destoff, u64 len)
+{
+ struct inode *src_inode = file_inode(src_file);
+ struct inode *target_inode = file_inode(dst_file);
+ struct cifsFileInfo *smb_file_src = src_file->private_data;
+ struct cifsFileInfo *smb_file_target = dst_file->private_data;
+ struct cifs_tcon *target_tcon = tlink_tcon(smb_file_target->tlink);
+ unsigned int xid;
+ int rc;
+
+ cifs_dbg(FYI, "clone range\n");
+
+ xid = get_xid();
+
+ if (!src_file->private_data || !dst_file->private_data) {
+ rc = -EBADF;
+ cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
+ goto out;
+ }
+
+ /*
+ * Note: cifs case is easier than btrfs since server responsible for
+ * checks for proper open modes and file type and if it wants
+ * server could even support copy of range where source = target
+ */
+ lock_two_nondirectories(target_inode, src_inode);
+
+ if (len == 0)
+ len = src_inode->i_size - off;
+
+ cifs_dbg(FYI, "about to flush pages\n");
+ /* should we flush first and last page first */
+ truncate_inode_pages_range(&target_inode->i_data, destoff,
+ PAGE_CACHE_ALIGN(destoff + len)-1);
+
+ if (target_tcon->ses->server->ops->duplicate_extents)
+ rc = target_tcon->ses->server->ops->duplicate_extents(xid,
+ smb_file_src, smb_file_target, off, len, destoff);
+ else
+ rc = -EOPNOTSUPP;
+
+ /* force revalidate of size and timestamps of target file now
+ that target is updated on the server */
+ CIFS_I(target_inode)->time = 0;
+ /* although unlocking in the reverse order from locking is not
+ strictly necessary here it is a little cleaner to be consistent */
+ unlock_two_nondirectories(src_inode, target_inode);
+out:
+ free_xid(xid);
+ return rc;
+}
+
const struct file_operations cifs_file_ops = {
.read_iter = cifs_loose_read_iter,
.write_iter = cifs_file_write_iter,
@@ -926,6 +995,7 @@ const struct file_operations cifs_file_ops = {
.splice_read = generic_file_splice_read,
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
+ .clone_file_range = cifs_clone_file_range,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
};
@@ -942,6 +1012,7 @@ const struct file_operations cifs_file_strict_ops = {
.splice_read = generic_file_splice_read,
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
+ .clone_file_range = cifs_clone_file_range,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
};
@@ -958,6 +1029,7 @@ const struct file_operations cifs_file_direct_ops = {
.mmap = cifs_file_mmap,
.splice_read = generic_file_splice_read,
.unlocked_ioctl = cifs_ioctl,
+ .clone_file_range = cifs_clone_file_range,
.llseek = cifs_llseek,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
@@ -974,6 +1046,7 @@ const struct file_operations cifs_file_nobrl_ops = {
.splice_read = generic_file_splice_read,
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
+ .clone_file_range = cifs_clone_file_range,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
};
@@ -989,6 +1062,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
.splice_read = generic_file_splice_read,
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
+ .clone_file_range = cifs_clone_file_range,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
};
@@ -1004,6 +1078,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
.mmap = cifs_file_mmap,
.splice_read = generic_file_splice_read,
.unlocked_ioctl = cifs_ioctl,
+ .clone_file_range = cifs_clone_file_range,
.llseek = cifs_llseek,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
@@ -1014,6 +1089,7 @@ const struct file_operations cifs_dir_ops = {
.release = cifs_closedir,
.read = generic_read_dir,
.unlocked_ioctl = cifs_ioctl,
+ .clone_file_range = cifs_clone_file_range,
.llseek = generic_file_llseek,
};
@@ -1032,7 +1108,7 @@ cifs_init_inodecache(void)
cifs_inode_cachep = kmem_cache_create("cifs_inode_cache",
sizeof(struct cifsInodeInfo),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
cifs_init_once);
if (cifs_inode_cachep == NULL)
return -ENOMEM;
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 44b3d4280..83aac8ba5 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -116,9 +116,8 @@ extern struct vfsmount *cifs_dfs_d_automount(struct path *path);
#endif
/* Functions related to symlinks */
-extern const char *cifs_follow_link(struct dentry *direntry, void **cookie);
-extern int cifs_readlink(struct dentry *direntry, char __user *buffer,
- int buflen);
+extern const char *cifs_get_link(struct dentry *, struct inode *,
+ struct delayed_call *);
extern int cifs_symlink(struct inode *inode, struct dentry *direntry,
const char *symname);
extern int cifs_removexattr(struct dentry *, const char *);
@@ -127,7 +126,6 @@ extern int cifs_setxattr(struct dentry *, const char *, const void *,
extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t);
extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
-
#ifdef CONFIG_CIFS_NFSD_EXPORT
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 2b510c537..a25b2513f 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -70,8 +70,10 @@
#define SERVER_NAME_LENGTH 40
#define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1)
-/* SMB echo "timeout" -- FIXME: tunable? */
-#define SMB_ECHO_INTERVAL (60 * HZ)
+/* echo interval in seconds */
+#define SMB_ECHO_INTERVAL_MIN 1
+#define SMB_ECHO_INTERVAL_MAX 600
+#define SMB_ECHO_INTERVAL_DEFAULT 60
#include "cifspdu.h"
@@ -225,7 +227,7 @@ struct smb_version_operations {
void (*print_stats)(struct seq_file *m, struct cifs_tcon *);
void (*dump_share_caps)(struct seq_file *, struct cifs_tcon *);
/* verify the message */
- int (*check_message)(char *, unsigned int);
+ int (*check_message)(char *, unsigned int, struct TCP_Server_Info *);
bool (*is_oplock_break)(char *, struct TCP_Server_Info *);
void (*downgrade_oplock)(struct TCP_Server_Info *,
struct cifsInodeInfo *, bool);
@@ -507,6 +509,7 @@ struct smb_vol {
struct sockaddr_storage dstaddr; /* destination address */
struct sockaddr_storage srcaddr; /* allow binding to a local IP */
struct nls_table *local_nls;
+ unsigned int echo_interval; /* echo interval in secs */
};
#define CIFS_MOUNT_MASK (CIFS_MOUNT_NO_PERM | CIFS_MOUNT_SET_UID | \
@@ -627,7 +630,9 @@ struct TCP_Server_Info {
#ifdef CONFIG_CIFS_SMB2
unsigned int max_read;
unsigned int max_write;
+ __u8 preauth_hash[512];
#endif /* CONFIG_CIFS_SMB2 */
+ unsigned long echo_interval;
};
static inline unsigned int
@@ -809,7 +814,10 @@ struct cifs_ses {
bool need_reconnect:1; /* connection reset, uid now invalid */
#ifdef CONFIG_CIFS_SMB2
__u16 session_flags;
- char smb3signingkey[SMB3_SIGN_KEY_SIZE]; /* for signing smb3 packets */
+ __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
+ __u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE];
+ __u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE];
+ __u8 preauth_hash[512];
#endif /* CONFIG_CIFS_SMB2 */
};
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index c63fd1dde..eed7ff50f 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -102,7 +102,7 @@ extern int SendReceiveBlockingLock(const unsigned int xid,
struct smb_hdr *out_buf,
int *bytes_returned);
extern int cifs_reconnect(struct TCP_Server_Info *server);
-extern int checkSMB(char *buf, unsigned int length);
+extern int checkSMB(char *buf, unsigned int len, struct TCP_Server_Info *srvr);
extern bool is_valid_oplock_break(char *, struct TCP_Server_Info *);
extern bool backup_cred(struct cifs_sb_info *);
extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof);
@@ -439,7 +439,8 @@ extern int setup_ntlm_response(struct cifs_ses *, const struct nls_table *);
extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *);
extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
extern int calc_seckey(struct cifs_ses *);
-extern int generate_smb3signingkey(struct cifs_ses *);
+extern int generate_smb30signingkey(struct cifs_ses *);
+extern int generate_smb311signingkey(struct cifs_ses *);
#ifdef CONFIG_CIFS_WEAK_PW_HASH
extern int calc_lanman_hash(const char *password, const char *cryptkey,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 3c194ff0d..a763cd3d9 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -95,6 +95,7 @@ enum {
Opt_cruid, Opt_gid, Opt_file_mode,
Opt_dirmode, Opt_port,
Opt_rsize, Opt_wsize, Opt_actimeo,
+ Opt_echo_interval,
/* Mount options which take string value */
Opt_user, Opt_pass, Opt_ip,
@@ -188,6 +189,7 @@ static const match_table_t cifs_mount_option_tokens = {
{ Opt_rsize, "rsize=%s" },
{ Opt_wsize, "wsize=%s" },
{ Opt_actimeo, "actimeo=%s" },
+ { Opt_echo_interval, "echo_interval=%s" },
{ Opt_blank_user, "user=" },
{ Opt_blank_user, "username=" },
@@ -418,6 +420,7 @@ cifs_echo_request(struct work_struct *work)
int rc;
struct TCP_Server_Info *server = container_of(work,
struct TCP_Server_Info, echo.work);
+ unsigned long echo_interval = server->echo_interval;
/*
* We cannot send an echo if it is disabled or until the
@@ -427,7 +430,7 @@ cifs_echo_request(struct work_struct *work)
*/
if (!server->ops->need_neg || server->ops->need_neg(server) ||
(server->ops->can_echo && !server->ops->can_echo(server)) ||
- time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ))
+ time_before(jiffies, server->lstrp + echo_interval - HZ))
goto requeue_echo;
rc = server->ops->echo ? server->ops->echo(server) : -ENOSYS;
@@ -436,7 +439,7 @@ cifs_echo_request(struct work_struct *work)
server->hostname);
requeue_echo:
- queue_delayed_work(cifsiod_wq, &server->echo, SMB_ECHO_INTERVAL);
+ queue_delayed_work(cifsiod_wq, &server->echo, echo_interval);
}
static bool
@@ -487,9 +490,9 @@ server_unresponsive(struct TCP_Server_Info *server)
* a response in >60s.
*/
if (server->tcpStatus == CifsGood &&
- time_after(jiffies, server->lstrp + 2 * SMB_ECHO_INTERVAL)) {
- cifs_dbg(VFS, "Server %s has not responded in %d seconds. Reconnecting...\n",
- server->hostname, (2 * SMB_ECHO_INTERVAL) / HZ);
+ time_after(jiffies, server->lstrp + 2 * server->echo_interval)) {
+ cifs_dbg(VFS, "Server %s has not responded in %lu seconds. Reconnecting...\n",
+ server->hostname, (2 * server->echo_interval) / HZ);
cifs_reconnect(server);
wake_up(&server->response_q);
return true;
@@ -828,7 +831,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
* 48 bytes is enough to display the header and a little bit
* into the payload for debugging purposes.
*/
- length = server->ops->check_message(buf, server->total_read);
+ length = server->ops->check_message(buf, server->total_read, server);
if (length != 0)
cifs_dump_mem("Bad SMB: ", buf,
min_t(unsigned int, server->total_read, 48));
@@ -1624,6 +1627,14 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
goto cifs_parse_mount_err;
}
break;
+ case Opt_echo_interval:
+ if (get_option_ul(args, &option)) {
+ cifs_dbg(VFS, "%s: Invalid echo interval value\n",
+ __func__);
+ goto cifs_parse_mount_err;
+ }
+ vol->echo_interval = option;
+ break;
/* String Arguments */
@@ -2089,6 +2100,9 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol)
if (!match_security(server, vol))
return 0;
+ if (server->echo_interval != vol->echo_interval)
+ return 0;
+
return 1;
}
@@ -2208,6 +2222,12 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
tcp_ses->tcpStatus = CifsNew;
++tcp_ses->srv_count;
+ if (volume_info->echo_interval >= SMB_ECHO_INTERVAL_MIN &&
+ volume_info->echo_interval <= SMB_ECHO_INTERVAL_MAX)
+ tcp_ses->echo_interval = volume_info->echo_interval * HZ;
+ else
+ tcp_ses->echo_interval = SMB_ECHO_INTERVAL_DEFAULT * HZ;
+
rc = ip_connect(tcp_ses);
if (rc < 0) {
cifs_dbg(VFS, "Error connecting to socket. Aborting operation.\n");
@@ -2237,7 +2257,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
cifs_fscache_get_client_cookie(tcp_ses);
/* queue echo request delayed work */
- queue_delayed_work(cifsiod_wq, &tcp_ses->echo, SMB_ECHO_INTERVAL);
+ queue_delayed_work(cifsiod_wq, &tcp_ses->echo, tcp_ses->echo_interval);
return tcp_ses;
@@ -2979,8 +2999,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server)
if (ses_init_buf) {
ses_init_buf->trailer.session_req.called_len = 32;
- if (server->server_RFC1001_name &&
- server->server_RFC1001_name[0] != 0)
+ if (server->server_RFC1001_name[0] != 0)
rfc1002mangle(ses_init_buf->trailer.
session_req.called_name,
server->server_RFC1001_name,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0068e8221..ff882aeac 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2267,7 +2267,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (rc)
return rc;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
xid = get_xid();
@@ -2292,7 +2292,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
}
free_xid(xid);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return rc;
}
@@ -2309,7 +2309,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (rc)
return rc;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
xid = get_xid();
@@ -2326,7 +2326,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
}
free_xid(xid);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return rc;
}
@@ -2672,7 +2672,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
* with a brlock that prevents writing.
*/
down_read(&cinode->lock_sem);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
rc = generic_write_checks(iocb, from);
if (rc <= 0)
@@ -2685,7 +2685,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
else
rc = -EACCES;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (rc > 0) {
ssize_t err = generic_write_sync(file, iocb->ki_pos - rc, rc);
@@ -3391,13 +3391,13 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
* should have access to this page, we're safe to simply set
* PG_locked without checking it first.
*/
- __set_page_locked(page);
+ __SetPageLocked(page);
rc = add_to_page_cache_locked(page, mapping,
page->index, gfp);
/* give up if we can't stick it in the cache */
if (rc) {
- __clear_page_locked(page);
+ __ClearPageLocked(page);
return rc;
}
@@ -3418,9 +3418,9 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
if (*bytes + PAGE_CACHE_SIZE > rsize)
break;
- __set_page_locked(page);
+ __SetPageLocked(page);
if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
- __clear_page_locked(page);
+ __ClearPageLocked(page);
break;
}
list_move_tail(&page->lru, tmplist);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a329f5ba3..aeb26dbfa 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -814,8 +814,21 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
}
} else
fattr.cf_uniqueid = iunique(sb, ROOT_I);
- } else
- fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid;
+ } else {
+ if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) &&
+ validinum == false && server->ops->get_srv_inum) {
+ /*
+ * Pass a NULL tcon to ensure we don't make a round
+ * trip to the server. This only works for SMB2+.
+ */
+ tmprc = server->ops->get_srv_inum(xid,
+ NULL, cifs_sb, full_path,
+ &fattr.cf_uniqueid, data);
+ if (tmprc)
+ fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid;
+ } else
+ fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid;
+ }
/* query for SFU type info if supported and needed */
if (fattr.cf_cifsattrs & ATTR_SYSTEM &&
@@ -856,6 +869,13 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
} else {
/* we already have inode, update it */
+ /* if uniqueid is different, return error */
+ if (unlikely(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM &&
+ CIFS_I(*inode)->uniqueid != fattr.cf_uniqueid)) {
+ rc = -ESTALE;
+ goto cgii_exit;
+ }
+
/* if filetype is different, return error */
if (unlikely(((*inode)->i_mode & S_IFMT) !=
(fattr.cf_mode & S_IFMT))) {
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 35cf990f8..7a3b84e30 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -34,73 +34,36 @@
#include "cifs_ioctl.h"
#include <linux/btrfs.h>
-static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
- unsigned long srcfd, u64 off, u64 len, u64 destoff,
- bool dup_extents)
+static int cifs_file_clone_range(unsigned int xid, struct file *src_file,
+ struct file *dst_file)
{
- int rc;
- struct cifsFileInfo *smb_file_target = dst_file->private_data;
+ struct inode *src_inode = file_inode(src_file);
struct inode *target_inode = file_inode(dst_file);
- struct cifs_tcon *target_tcon;
- struct fd src_file;
struct cifsFileInfo *smb_file_src;
- struct inode *src_inode;
+ struct cifsFileInfo *smb_file_target;
struct cifs_tcon *src_tcon;
+ struct cifs_tcon *target_tcon;
+ int rc;
cifs_dbg(FYI, "ioctl clone range\n");
- /* the destination must be opened for writing */
- if (!(dst_file->f_mode & FMODE_WRITE)) {
- cifs_dbg(FYI, "file target not open for write\n");
- return -EINVAL;
- }
- /* check if target volume is readonly and take reference */
- rc = mnt_want_write_file(dst_file);
- if (rc) {
- cifs_dbg(FYI, "mnt_want_write failed with rc %d\n", rc);
- return rc;
- }
-
- src_file = fdget(srcfd);
- if (!src_file.file) {
- rc = -EBADF;
- goto out_drop_write;
- }
-
- if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) {
- rc = -EBADF;
- cifs_dbg(VFS, "src file seems to be from a different filesystem type\n");
- goto out_fput;
- }
-
- if ((!src_file.file->private_data) || (!dst_file->private_data)) {
+ if (!src_file->private_data || !dst_file->private_data) {
rc = -EBADF;
cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
- goto out_fput;
+ goto out;
}
rc = -EXDEV;
smb_file_target = dst_file->private_data;
- smb_file_src = src_file.file->private_data;
+ smb_file_src = src_file->private_data;
src_tcon = tlink_tcon(smb_file_src->tlink);
target_tcon = tlink_tcon(smb_file_target->tlink);
- /* check source and target on same server (or volume if dup_extents) */
- if (dup_extents && (src_tcon != target_tcon)) {
- cifs_dbg(VFS, "source and target of copy not on same share\n");
- goto out_fput;
- }
-
- if (!dup_extents && (src_tcon->ses != target_tcon->ses)) {
+ if (src_tcon->ses != target_tcon->ses) {
cifs_dbg(VFS, "source and target of copy not on same server\n");
- goto out_fput;
+ goto out;
}
- src_inode = file_inode(src_file.file);
- rc = -EINVAL;
- if (S_ISDIR(src_inode->i_mode))
- goto out_fput;
-
/*
* Note: cifs case is easier than btrfs since server responsible for
* checks for proper open modes and file type and if it wants
@@ -108,34 +71,66 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
*/
lock_two_nondirectories(target_inode, src_inode);
- /* determine range to clone */
- rc = -EINVAL;
- if (off + len > src_inode->i_size || off + len < off)
- goto out_unlock;
- if (len == 0)
- len = src_inode->i_size - off;
-
cifs_dbg(FYI, "about to flush pages\n");
/* should we flush first and last page first */
- truncate_inode_pages_range(&target_inode->i_data, destoff,
- PAGE_CACHE_ALIGN(destoff + len)-1);
+ truncate_inode_pages(&target_inode->i_data, 0);
- if (dup_extents && target_tcon->ses->server->ops->duplicate_extents)
- rc = target_tcon->ses->server->ops->duplicate_extents(xid,
- smb_file_src, smb_file_target, off, len, destoff);
- else if (!dup_extents && target_tcon->ses->server->ops->clone_range)
+ if (target_tcon->ses->server->ops->clone_range)
rc = target_tcon->ses->server->ops->clone_range(xid,
- smb_file_src, smb_file_target, off, len, destoff);
+ smb_file_src, smb_file_target, 0, src_inode->i_size, 0);
else
rc = -EOPNOTSUPP;
/* force revalidate of size and timestamps of target file now
that target is updated on the server */
CIFS_I(target_inode)->time = 0;
-out_unlock:
/* although unlocking in the reverse order from locking is not
strictly necessary here it is a little cleaner to be consistent */
unlock_two_nondirectories(src_inode, target_inode);
+out:
+ return rc;
+}
+
+static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
+ unsigned long srcfd)
+{
+ int rc;
+ struct fd src_file;
+ struct inode *src_inode;
+
+ cifs_dbg(FYI, "ioctl clone range\n");
+ /* the destination must be opened for writing */
+ if (!(dst_file->f_mode & FMODE_WRITE)) {
+ cifs_dbg(FYI, "file target not open for write\n");
+ return -EINVAL;
+ }
+
+ /* check if target volume is readonly and take reference */
+ rc = mnt_want_write_file(dst_file);
+ if (rc) {
+ cifs_dbg(FYI, "mnt_want_write failed with rc %d\n", rc);
+ return rc;
+ }
+
+ src_file = fdget(srcfd);
+ if (!src_file.file) {
+ rc = -EBADF;
+ goto out_drop_write;
+ }
+
+ if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) {
+ rc = -EBADF;
+ cifs_dbg(VFS, "src file seems to be from a different filesystem type\n");
+ goto out_fput;
+ }
+
+ src_inode = file_inode(src_file.file);
+ rc = -EINVAL;
+ if (S_ISDIR(src_inode->i_mode))
+ goto out_fput;
+
+ rc = cifs_file_clone_range(xid, src_file.file, dst_file);
+
out_fput:
fdput(src_file);
out_drop_write:
@@ -256,10 +251,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
}
break;
case CIFS_IOC_COPYCHUNK_FILE:
- rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0, false);
- break;
- case BTRFS_IOC_CLONE:
- rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0, true);
+ rc = cifs_ioctl_clone(xid, filep, arg);
break;
case CIFS_IOC_SET_INTEGRITY:
if (pSMBFile == NULL)
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index e3548f73b..062c23755 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -627,9 +627,9 @@ cifs_hl_exit:
}
const char *
-cifs_follow_link(struct dentry *direntry, void **cookie)
+cifs_get_link(struct dentry *direntry, struct inode *inode,
+ struct delayed_call *done)
{
- struct inode *inode = d_inode(direntry);
int rc = -ENOMEM;
unsigned int xid;
char *full_path = NULL;
@@ -639,6 +639,9 @@ cifs_follow_link(struct dentry *direntry, void **cookie)
struct cifs_tcon *tcon;
struct TCP_Server_Info *server;
+ if (!direntry)
+ return ERR_PTR(-ECHILD);
+
xid = get_xid();
tlink = cifs_sb_tlink(cifs_sb);
@@ -678,7 +681,8 @@ cifs_follow_link(struct dentry *direntry, void **cookie)
kfree(target_path);
return ERR_PTR(rc);
}
- return *cookie = target_path;
+ set_delayed_call(done, kfree_link, target_path);
+ return target_path;
}
int
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 8442b8b8e..813fe13c2 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -310,7 +310,7 @@ check_smb_hdr(struct smb_hdr *smb)
}
int
-checkSMB(char *buf, unsigned int total_read)
+checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server)
{
struct smb_hdr *smb = (struct smb_hdr *)buf;
__u32 rfclen = be32_to_cpu(smb->smb_buf_length);
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 1c5907019..389fb9f8c 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -38,7 +38,7 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid)
* Make sure that this really is an SMB, that it is a response,
* and that the message ids match.
*/
- if ((*(__le32 *)hdr->ProtocolId == SMB2_PROTO_NUMBER) &&
+ if ((hdr->ProtocolId == SMB2_PROTO_NUMBER) &&
(mid == wire_mid)) {
if (hdr->Flags & SMB2_FLAGS_SERVER_TO_REDIR)
return 0;
@@ -50,9 +50,9 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid)
cifs_dbg(VFS, "Received Request not response\n");
}
} else { /* bad signature or mid */
- if (*(__le32 *)hdr->ProtocolId != SMB2_PROTO_NUMBER)
+ if (hdr->ProtocolId != SMB2_PROTO_NUMBER)
cifs_dbg(VFS, "Bad protocol string signature header %x\n",
- *(unsigned int *) hdr->ProtocolId);
+ le32_to_cpu(hdr->ProtocolId));
if (mid != wire_mid)
cifs_dbg(VFS, "Mids do not match: %llu and %llu\n",
mid, wire_mid);
@@ -93,11 +93,11 @@ static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
};
int
-smb2_check_message(char *buf, unsigned int length)
+smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
{
struct smb2_hdr *hdr = (struct smb2_hdr *)buf;
struct smb2_pdu *pdu = (struct smb2_pdu *)hdr;
- __u64 mid = le64_to_cpu(hdr->MessageId);
+ __u64 mid;
__u32 len = get_rfc1002_length(buf);
__u32 clc_len; /* calculated length */
int command;
@@ -111,6 +111,30 @@ smb2_check_message(char *buf, unsigned int length)
* ie Validate the wct via smb2_struct_sizes table above
*/
+ if (hdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) {
+ struct smb2_transform_hdr *thdr =
+ (struct smb2_transform_hdr *)buf;
+ struct cifs_ses *ses = NULL;
+ struct list_head *tmp;
+
+ /* decrypt frame now that it is completely read in */
+ spin_lock(&cifs_tcp_ses_lock);
+ list_for_each(tmp, &srvr->smb_ses_list) {
+ ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+ if (ses->Suid == thdr->SessionId)
+ break;
+
+ ses = NULL;
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
+ if (ses == NULL) {
+ cifs_dbg(VFS, "no decryption - session id not found\n");
+ return 1;
+ }
+ }
+
+
+ mid = le64_to_cpu(hdr->MessageId);
if (length < sizeof(struct smb2_pdu)) {
if ((length >= sizeof(struct smb2_hdr)) && (hdr->Status != 0)) {
pdu->StructureSize2 = 0;
@@ -322,7 +346,7 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
/* return pointer to beginning of data area, ie offset from SMB start */
if ((*off != 0) && (*len != 0))
- return (char *)(&hdr->ProtocolId[0]) + *off;
+ return (char *)(&hdr->ProtocolId) + *off;
else
return NULL;
}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 53ccdde6f..3525ed756 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -182,6 +182,11 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf)
struct smb2_hdr *hdr = (struct smb2_hdr *)buf;
__u64 wire_mid = le64_to_cpu(hdr->MessageId);
+ if (hdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) {
+ cifs_dbg(VFS, "encrypted frame parsing not supported yet");
+ return NULL;
+ }
+
spin_lock(&GlobalMid_Lock);
list_for_each_entry(mid, &server->pending_mid_q, qhead) {
if ((mid->mid == wire_mid) &&
@@ -1692,7 +1697,7 @@ struct smb_version_operations smb30_operations = {
.get_lease_key = smb2_get_lease_key,
.set_lease_key = smb2_set_lease_key,
.new_lease_key = smb2_new_lease_key,
- .generate_signingkey = generate_smb3signingkey,
+ .generate_signingkey = generate_smb30signingkey,
.calc_signature = smb3_calc_signature,
.set_integrity = smb3_set_integrity,
.is_read_op = smb21_is_read_op,
@@ -1779,7 +1784,7 @@ struct smb_version_operations smb311_operations = {
.get_lease_key = smb2_get_lease_key,
.set_lease_key = smb2_set_lease_key,
.new_lease_key = smb2_new_lease_key,
- .generate_signingkey = generate_smb3signingkey,
+ .generate_signingkey = generate_smb311signingkey,
.calc_signature = smb3_calc_signature,
.set_integrity = smb3_set_integrity,
.is_read_op = smb21_is_read_op,
@@ -1838,7 +1843,7 @@ struct smb_version_values smb21_values = {
struct smb_version_values smb30_values = {
.version_string = SMB30_VERSION_STRING,
.protocol_id = SMB30_PROT_ID,
- .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES,
+ .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
.large_lock_type = 0,
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
@@ -1858,7 +1863,7 @@ struct smb_version_values smb30_values = {
struct smb_version_values smb302_values = {
.version_string = SMB302_VERSION_STRING,
.protocol_id = SMB302_PROT_ID,
- .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES,
+ .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
.large_lock_type = 0,
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 373b5cd1c..42e1f440e 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -97,10 +97,7 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ ,
hdr->smb2_buf_length = cpu_to_be32(parmsize + sizeof(struct smb2_hdr)
- 4 /* RFC 1001 length field itself not counted */);
- hdr->ProtocolId[0] = 0xFE;
- hdr->ProtocolId[1] = 'S';
- hdr->ProtocolId[2] = 'M';
- hdr->ProtocolId[3] = 'B';
+ hdr->ProtocolId = SMB2_PROTO_NUMBER;
hdr->StructureSize = cpu_to_le16(64);
hdr->Command = smb2_cmd;
hdr->CreditRequest = cpu_to_le16(2); /* BB make this dynamic */
@@ -1577,7 +1574,8 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
goto ioctl_exit;
}
- memcpy(*out_data, rsp->hdr.ProtocolId + le32_to_cpu(rsp->OutputOffset),
+ memcpy(*out_data,
+ (char *)&rsp->hdr.ProtocolId + le32_to_cpu(rsp->OutputOffset),
*plen);
ioctl_exit:
free_rsp_buf(resp_buftype, rsp);
@@ -2097,7 +2095,7 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
}
if (*buf) {
- memcpy(*buf, (char *)rsp->hdr.ProtocolId + rsp->DataOffset,
+ memcpy(*buf, (char *)&rsp->hdr.ProtocolId + rsp->DataOffset,
*nbytes);
free_rsp_buf(resp_buftype, iov[0].iov_base);
} else if (resp_buftype != CIFS_NO_BUFFER) {
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 4af52780e..ff88d9feb 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -86,6 +86,7 @@
#define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */
#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe)
+#define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd)
/*
* SMB2 Header Definition
@@ -102,7 +103,7 @@ struct smb2_hdr {
__be32 smb2_buf_length; /* big endian on wire */
/* length is only two or three bytes - with
one or two byte type preceding it that MBZ */
- __u8 ProtocolId[4]; /* 0xFE 'S' 'M' 'B' */
+ __le32 ProtocolId; /* 0xFE 'S' 'M' 'B' */
__le16 StructureSize; /* 64 */
__le16 CreditCharge; /* MBZ */
__le32 Status; /* Error from server */
@@ -128,11 +129,10 @@ struct smb2_transform_hdr {
one or two byte type preceding it that MBZ */
__u8 ProtocolId[4]; /* 0xFD 'S' 'M' 'B' */
__u8 Signature[16];
- __u8 Nonce[11];
- __u8 Reserved[5];
+ __u8 Nonce[16];
__le32 OriginalMessageSize;
__u16 Reserved1;
- __le16 EncryptionAlgorithm;
+ __le16 Flags; /* EncryptionAlgorithm */
__u64 SessionId;
} __packed;
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 79dc650c1..4f07dc936 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -34,7 +34,8 @@ struct smb_rqst;
*****************************************************************
*/
extern int map_smb2_to_linux_error(char *buf, bool log_err);
-extern int smb2_check_message(char *buf, unsigned int length);
+extern int smb2_check_message(char *buf, unsigned int length,
+ struct TCP_Server_Info *server);
extern unsigned int smb2_calc_size(void *buf);
extern char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr);
extern __le16 *cifs_convert_path_to_utf16(const char *from,
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index d4c5b6f10..8732a43b1 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -222,8 +222,8 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
return rc;
}
-int
-generate_smb3signingkey(struct cifs_ses *ses)
+static int generate_key(struct cifs_ses *ses, struct kvec label,
+ struct kvec context, __u8 *key, unsigned int key_size)
{
unsigned char zero = 0x0;
__u8 i[4] = {0, 0, 0, 1};
@@ -233,7 +233,7 @@ generate_smb3signingkey(struct cifs_ses *ses)
unsigned char *hashptr = prfhash;
memset(prfhash, 0x0, SMB2_HMACSHA256_SIZE);
- memset(ses->smb3signingkey, 0x0, SMB3_SIGNKEY_SIZE);
+ memset(key, 0x0, key_size);
rc = smb3_crypto_shash_allocate(ses->server);
if (rc) {
@@ -262,7 +262,7 @@ generate_smb3signingkey(struct cifs_ses *ses)
}
rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash,
- "SMB2AESCMAC", 12);
+ label.iov_base, label.iov_len);
if (rc) {
cifs_dbg(VFS, "%s: Could not update with label\n", __func__);
goto smb3signkey_ret;
@@ -276,7 +276,7 @@ generate_smb3signingkey(struct cifs_ses *ses)
}
rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash,
- "SmbSign", 8);
+ context.iov_base, context.iov_len);
if (rc) {
cifs_dbg(VFS, "%s: Could not update with context\n", __func__);
goto smb3signkey_ret;
@@ -296,12 +296,102 @@ generate_smb3signingkey(struct cifs_ses *ses)
goto smb3signkey_ret;
}
- memcpy(ses->smb3signingkey, hashptr, SMB3_SIGNKEY_SIZE);
+ memcpy(key, hashptr, key_size);
smb3signkey_ret:
return rc;
}
+struct derivation {
+ struct kvec label;
+ struct kvec context;
+};
+
+struct derivation_triplet {
+ struct derivation signing;
+ struct derivation encryption;
+ struct derivation decryption;
+};
+
+static int
+generate_smb3signingkey(struct cifs_ses *ses,
+ const struct derivation_triplet *ptriplet)
+{
+ int rc;
+
+ rc = generate_key(ses, ptriplet->signing.label,
+ ptriplet->signing.context, ses->smb3signingkey,
+ SMB3_SIGN_KEY_SIZE);
+ if (rc)
+ return rc;
+
+ rc = generate_key(ses, ptriplet->encryption.label,
+ ptriplet->encryption.context, ses->smb3encryptionkey,
+ SMB3_SIGN_KEY_SIZE);
+ if (rc)
+ return rc;
+
+ return generate_key(ses, ptriplet->decryption.label,
+ ptriplet->decryption.context,
+ ses->smb3decryptionkey, SMB3_SIGN_KEY_SIZE);
+}
+
+int
+generate_smb30signingkey(struct cifs_ses *ses)
+
+{
+ struct derivation_triplet triplet;
+ struct derivation *d;
+
+ d = &triplet.signing;
+ d->label.iov_base = "SMB2AESCMAC";
+ d->label.iov_len = 12;
+ d->context.iov_base = "SmbSign";
+ d->context.iov_len = 8;
+
+ d = &triplet.encryption;
+ d->label.iov_base = "SMB2AESCCM";
+ d->label.iov_len = 11;
+ d->context.iov_base = "ServerIn ";
+ d->context.iov_len = 10;
+
+ d = &triplet.decryption;
+ d->label.iov_base = "SMB2AESCCM";
+ d->label.iov_len = 11;
+ d->context.iov_base = "ServerOut";
+ d->context.iov_len = 10;
+
+ return generate_smb3signingkey(ses, &triplet);
+}
+
+int
+generate_smb311signingkey(struct cifs_ses *ses)
+
+{
+ struct derivation_triplet triplet;
+ struct derivation *d;
+
+ d = &triplet.signing;
+ d->label.iov_base = "SMB2AESCMAC";
+ d->label.iov_len = 12;
+ d->context.iov_base = "SmbSign";
+ d->context.iov_len = 8;
+
+ d = &triplet.encryption;
+ d->label.iov_base = "SMB2AESCCM";
+ d->label.iov_len = 11;
+ d->context.iov_base = "ServerIn ";
+ d->context.iov_len = 10;
+
+ d = &triplet.decryption;
+ d->label.iov_base = "SMB2AESCCM";
+ d->label.iov_len = 11;
+ d->context.iov_base = "ServerOut";
+ d->context.iov_len = 10;
+
+ return generate_smb3signingkey(ses, &triplet);
+}
+
int
smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
{
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index ff9e1f8b1..f5dc2f0df 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -190,8 +190,8 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
#endif /* CONFIG_CIFS_ACL */
} else {
int temp;
- temp = strncmp(ea_name, POSIX_ACL_XATTR_ACCESS,
- strlen(POSIX_ACL_XATTR_ACCESS));
+ temp = strncmp(ea_name, XATTR_NAME_POSIX_ACL_ACCESS,
+ strlen(XATTR_NAME_POSIX_ACL_ACCESS));
if (temp == 0) {
#ifdef CONFIG_CIFS_POSIX
if (sb->s_flags & MS_POSIXACL)
@@ -203,8 +203,8 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
#else
cifs_dbg(FYI, "set POSIX ACL not supported\n");
#endif
- } else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT,
- strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) {
+ } else if (strncmp(ea_name, XATTR_NAME_POSIX_ACL_DEFAULT,
+ strlen(XATTR_NAME_POSIX_ACL_DEFAULT)) == 0) {
#ifdef CONFIG_CIFS_POSIX
if (sb->s_flags & MS_POSIXACL)
rc = CIFSSMBSetPosixACL(xid, pTcon, full_path,
@@ -292,8 +292,8 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon,
full_path, ea_name, ea_value, buf_size,
cifs_sb->local_nls, cifs_remap(cifs_sb));
- } else if (strncmp(ea_name, POSIX_ACL_XATTR_ACCESS,
- strlen(POSIX_ACL_XATTR_ACCESS)) == 0) {
+ } else if (strncmp(ea_name, XATTR_NAME_POSIX_ACL_ACCESS,
+ strlen(XATTR_NAME_POSIX_ACL_ACCESS)) == 0) {
#ifdef CONFIG_CIFS_POSIX
if (sb->s_flags & MS_POSIXACL)
rc = CIFSSMBGetPosixACL(xid, pTcon, full_path,
@@ -303,8 +303,8 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
#else
cifs_dbg(FYI, "Query POSIX ACL not supported yet\n");
#endif /* CONFIG_CIFS_POSIX */
- } else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT,
- strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) {
+ } else if (strncmp(ea_name, XATTR_NAME_POSIX_ACL_DEFAULT,
+ strlen(XATTR_NAME_POSIX_ACL_DEFAULT)) == 0) {
#ifdef CONFIG_CIFS_POSIX
if (sb->s_flags & MS_POSIXACL)
rc = CIFSSMBGetPosixACL(xid, pTcon, full_path,
diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c
index 7740b1c87..1bfb7ba4e 100644
--- a/fs/coda/cnode.c
+++ b/fs/coda/cnode.c
@@ -8,6 +8,7 @@
#include <linux/coda.h>
#include <linux/coda_psdev.h>
+#include <linux/pagemap.h>
#include "coda_linux.h"
static inline int coda_fideq(struct CodaFid *fid1, struct CodaFid *fid2)
@@ -17,8 +18,7 @@ static inline int coda_fideq(struct CodaFid *fid1, struct CodaFid *fid2)
static const struct inode_operations coda_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.setattr = coda_setattr,
};
@@ -35,6 +35,7 @@ static void coda_fill_inode(struct inode *inode, struct coda_vattr *attr)
inode->i_fop = &coda_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &coda_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_data.a_ops = &coda_symlink_aops;
inode->i_mapping = &inode->i_data;
} else
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h
index f829fe963..5104d84c4 100644
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -72,8 +72,7 @@ void coda_sysctl_clean(void);
} while (0)
-#define CODA_FREE(ptr,size) \
- do { if (size < PAGE_SIZE) kfree((ptr)); else vfree((ptr)); } while (0)
+#define CODA_FREE(ptr, size) kvfree((ptr))
/* inode to cnode access functions */
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index fda9f4311..42e731b8c 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -427,13 +427,13 @@ static int coda_readdir(struct file *coda_file, struct dir_context *ctx)
if (host_file->f_op->iterate) {
struct inode *host_inode = file_inode(host_file);
- mutex_lock(&host_inode->i_mutex);
+ inode_lock(host_inode);
ret = -ENOENT;
if (!IS_DEADDIR(host_inode)) {
ret = host_file->f_op->iterate(host_file, ctx);
file_accessed(host_file);
}
- mutex_unlock(&host_inode->i_mutex);
+ inode_unlock(host_inode);
return ret;
}
/* Venus: we must read Venus dirents from a file */
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 1da3805f3..f47c74838 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -71,12 +71,12 @@ coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
host_file = cfi->cfi_container;
file_start_write(host_file);
- mutex_lock(&coda_inode->i_mutex);
+ inode_lock(coda_inode);
ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos);
coda_inode->i_size = file_inode(host_file)->i_size;
coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC;
- mutex_unlock(&coda_inode->i_mutex);
+ inode_unlock(coda_inode);
file_end_write(host_file);
return ret;
}
@@ -203,7 +203,7 @@ int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync)
err = filemap_write_and_wait_range(coda_inode->i_mapping, start, end);
if (err)
return err;
- mutex_lock(&coda_inode->i_mutex);
+ inode_lock(coda_inode);
cfi = CODA_FTOC(coda_file);
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
@@ -212,7 +212,7 @@ int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync)
err = vfs_fsync(host_file, datasync);
if (!err && !datasync)
err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode));
- mutex_unlock(&coda_inode->i_mutex);
+ inode_unlock(coda_inode);
return err;
}
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index cac1390b8..57e81cbba 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -74,9 +74,9 @@ static void init_once(void *foo)
int __init coda_init_inodecache(void)
{
coda_inode_cachep = kmem_cache_create("coda_inode_cache",
- sizeof(struct coda_inode_info),
- 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
- init_once);
+ sizeof(struct coda_inode_info), 0,
+ SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
+ SLAB_ACCOUNT, init_once);
if (coda_inode_cachep == NULL)
return -ENOMEM;
return 0;
diff --git a/fs/coda/symlink.c b/fs/coda/symlink.c
index ab94ef63c..03736e20d 100644
--- a/fs/coda/symlink.c
+++ b/fs/coda/symlink.c
@@ -26,7 +26,7 @@ static int coda_symlink_filler(struct file *file, struct page *page)
int error;
struct coda_inode_info *cii;
unsigned int len = PAGE_SIZE;
- char *p = kmap(page);
+ char *p = page_address(page);
cii = ITOC(inode);
@@ -34,13 +34,11 @@ static int coda_symlink_filler(struct file *file, struct page *page)
if (error)
goto fail;
SetPageUptodate(page);
- kunmap(page);
unlock_page(page);
return 0;
fail:
SetPageError(page);
- kunmap(page);
unlock_page(page);
return error;
}
diff --git a/fs/compat.c b/fs/compat.c
index 6fd272d45..a71936a3f 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -792,7 +792,7 @@ COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name,
const void __user *, data)
{
char *kernel_type;
- unsigned long data_page;
+ void *options;
char *kernel_dev;
int retval;
@@ -806,26 +806,25 @@ COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name,
if (IS_ERR(kernel_dev))
goto out1;
- retval = copy_mount_options(data, &data_page);
- if (retval < 0)
+ options = copy_mount_options(data);
+ retval = PTR_ERR(options);
+ if (IS_ERR(options))
goto out2;
- retval = -EINVAL;
-
- if (kernel_type && data_page) {
+ if (kernel_type && options) {
if (!strcmp(kernel_type, NCPFS_NAME)) {
- do_ncp_super_data_conv((void *)data_page);
+ do_ncp_super_data_conv(options);
} else if (!strcmp(kernel_type, NFS4_NAME)) {
- if (do_nfs4_super_data_conv((void *) data_page))
+ retval = -EINVAL;
+ if (do_nfs4_super_data_conv(options))
goto out3;
}
}
- retval = do_mount(kernel_dev, dir_name, kernel_type,
- flags, (void*)data_page);
+ retval = do_mount(kernel_dev, dir_name, kernel_type, flags, options);
out3:
- free_page(data_page);
+ kfree(options);
out2:
kfree(kernel_dev);
out1:
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index dcf26537c..6402eaf8a 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -58,6 +58,8 @@
#include <linux/atalk.h>
#include <linux/gfp.h>
+#include "internal.h"
+
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_sock.h>
#include <net/bluetooth/rfcomm.h>
@@ -115,19 +117,38 @@
#include <asm/fbio.h>
#endif
-static int w_long(unsigned int fd, unsigned int cmd,
- compat_ulong_t __user *argp)
+#define convert_in_user(srcptr, dstptr) \
+({ \
+ typeof(*srcptr) val; \
+ \
+ get_user(val, srcptr) || put_user(val, dstptr); \
+})
+
+static int do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
- mm_segment_t old_fs = get_fs();
int err;
- unsigned long val;
- set_fs (KERNEL_DS);
- err = sys_ioctl(fd, cmd, (unsigned long)&val);
- set_fs (old_fs);
- if (!err && put_user(val, argp))
+ err = security_file_ioctl(file, cmd, arg);
+ if (err)
+ return err;
+
+ return vfs_ioctl(file, cmd, arg);
+}
+
+static int w_long(struct file *file,
+ unsigned int cmd, compat_ulong_t __user *argp)
+{
+ int err;
+ unsigned long __user *valp = compat_alloc_user_space(sizeof(*valp));
+
+ if (valp == NULL)
return -EFAULT;
- return err;
+ err = do_ioctl(file, cmd, (unsigned long)valp);
+ if (err)
+ return err;
+ if (convert_in_user(valp, argp))
+ return -EFAULT;
+ return 0;
}
struct compat_video_event {
@@ -139,23 +160,23 @@ struct compat_video_event {
} u;
};
-static int do_video_get_event(unsigned int fd, unsigned int cmd,
- struct compat_video_event __user *up)
+static int do_video_get_event(struct file *file,
+ unsigned int cmd, struct compat_video_event __user *up)
{
- struct video_event kevent;
- mm_segment_t old_fs = get_fs();
+ struct video_event __user *kevent =
+ compat_alloc_user_space(sizeof(*kevent));
int err;
- set_fs(KERNEL_DS);
- err = sys_ioctl(fd, cmd, (unsigned long) &kevent);
- set_fs(old_fs);
+ if (kevent == NULL)
+ return -EFAULT;
+ err = do_ioctl(file, cmd, (unsigned long)kevent);
if (!err) {
- err = put_user(kevent.type, &up->type);
- err |= put_user(kevent.timestamp, &up->timestamp);
- err |= put_user(kevent.u.size.w, &up->u.size.w);
- err |= put_user(kevent.u.size.h, &up->u.size.h);
- err |= put_user(kevent.u.size.aspect_ratio,
+ err = convert_in_user(&kevent->type, &up->type);
+ err |= convert_in_user(&kevent->timestamp, &up->timestamp);
+ err |= convert_in_user(&kevent->u.size.w, &up->u.size.w);
+ err |= convert_in_user(&kevent->u.size.h, &up->u.size.h);
+ err |= convert_in_user(&kevent->u.size.aspect_ratio,
&up->u.size.aspect_ratio);
if (err)
err = -EFAULT;
@@ -169,8 +190,8 @@ struct compat_video_still_picture {
int32_t size;
};
-static int do_video_stillpicture(unsigned int fd, unsigned int cmd,
- struct compat_video_still_picture __user *up)
+static int do_video_stillpicture(struct file *file,
+ unsigned int cmd, struct compat_video_still_picture __user *up)
{
struct video_still_picture __user *up_native;
compat_uptr_t fp;
@@ -190,7 +211,7 @@ static int do_video_stillpicture(unsigned int fd, unsigned int cmd,
if (err)
return -EFAULT;
- err = sys_ioctl(fd, cmd, (unsigned long) up_native);
+ err = do_ioctl(file, cmd, (unsigned long) up_native);
return err;
}
@@ -200,8 +221,8 @@ struct compat_video_spu_palette {
compat_uptr_t palette;
};
-static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd,
- struct compat_video_spu_palette __user *up)
+static int do_video_set_spu_palette(struct file *file,
+ unsigned int cmd, struct compat_video_spu_palette __user *up)
{
struct video_spu_palette __user *up_native;
compat_uptr_t palp;
@@ -218,7 +239,7 @@ static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd,
if (err)
return -EFAULT;
- err = sys_ioctl(fd, cmd, (unsigned long) up_native);
+ err = do_ioctl(file, cmd, (unsigned long) up_native);
return err;
}
@@ -276,7 +297,7 @@ static int sg_build_iovec(sg_io_hdr_t __user *sgio, void __user *dxferp, u16 iov
return 0;
}
-static int sg_ioctl_trans(unsigned int fd, unsigned int cmd,
+static int sg_ioctl_trans(struct file *file, unsigned int cmd,
sg_io_hdr32_t __user *sgio32)
{
sg_io_hdr_t __user *sgio;
@@ -289,7 +310,7 @@ static int sg_ioctl_trans(unsigned int fd, unsigned int cmd,
if (get_user(interface_id, &sgio32->interface_id))
return -EFAULT;
if (interface_id != 'S')
- return sys_ioctl(fd, cmd, (unsigned long)sgio32);
+ return do_ioctl(file, cmd, (unsigned long)sgio32);
if (get_user(iovec_count, &sgio32->iovec_count))
return -EFAULT;
@@ -349,7 +370,7 @@ static int sg_ioctl_trans(unsigned int fd, unsigned int cmd,
if (put_user(compat_ptr(data), &sgio->usr_ptr))
return -EFAULT;
- err = sys_ioctl(fd, cmd, (unsigned long) sgio);
+ err = do_ioctl(file, cmd, (unsigned long) sgio);
if (err >= 0) {
void __user *datap;
@@ -380,13 +401,13 @@ struct compat_sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */
int unused;
};
-static int sg_grt_trans(unsigned int fd, unsigned int cmd, struct
- compat_sg_req_info __user *o)
+static int sg_grt_trans(struct file *file,
+ unsigned int cmd, struct compat_sg_req_info __user *o)
{
int err, i;
sg_req_info_t __user *r;
r = compat_alloc_user_space(sizeof(sg_req_info_t)*SG_MAX_QUEUE);
- err = sys_ioctl(fd,cmd,(unsigned long)r);
+ err = do_ioctl(file, cmd, (unsigned long)r);
if (err < 0)
return err;
for (i = 0; i < SG_MAX_QUEUE; i++) {
@@ -412,8 +433,8 @@ struct sock_fprog32 {
#define PPPIOCSPASS32 _IOW('t', 71, struct sock_fprog32)
#define PPPIOCSACTIVE32 _IOW('t', 70, struct sock_fprog32)
-static int ppp_sock_fprog_ioctl_trans(unsigned int fd, unsigned int cmd,
- struct sock_fprog32 __user *u_fprog32)
+static int ppp_sock_fprog_ioctl_trans(struct file *file,
+ unsigned int cmd, struct sock_fprog32 __user *u_fprog32)
{
struct sock_fprog __user *u_fprog64 = compat_alloc_user_space(sizeof(struct sock_fprog));
void __user *fptr64;
@@ -435,7 +456,7 @@ static int ppp_sock_fprog_ioctl_trans(unsigned int fd, unsigned int cmd,
else
cmd = PPPIOCSACTIVE;
- return sys_ioctl(fd, cmd, (unsigned long) u_fprog64);
+ return do_ioctl(file, cmd, (unsigned long) u_fprog64);
}
struct ppp_option_data32 {
@@ -451,7 +472,7 @@ struct ppp_idle32 {
};
#define PPPIOCGIDLE32 _IOR('t', 63, struct ppp_idle32)
-static int ppp_gidle(unsigned int fd, unsigned int cmd,
+static int ppp_gidle(struct file *file, unsigned int cmd,
struct ppp_idle32 __user *idle32)
{
struct ppp_idle __user *idle;
@@ -460,7 +481,7 @@ static int ppp_gidle(unsigned int fd, unsigned int cmd,
idle = compat_alloc_user_space(sizeof(*idle));
- err = sys_ioctl(fd, PPPIOCGIDLE, (unsigned long) idle);
+ err = do_ioctl(file, PPPIOCGIDLE, (unsigned long) idle);
if (!err) {
if (get_user(xmit, &idle->xmit_idle) ||
@@ -472,7 +493,7 @@ static int ppp_gidle(unsigned int fd, unsigned int cmd,
return err;
}
-static int ppp_scompress(unsigned int fd, unsigned int cmd,
+static int ppp_scompress(struct file *file, unsigned int cmd,
struct ppp_option_data32 __user *odata32)
{
struct ppp_option_data __user *odata;
@@ -492,7 +513,7 @@ static int ppp_scompress(unsigned int fd, unsigned int cmd,
sizeof(__u32) + sizeof(int)))
return -EFAULT;
- return sys_ioctl(fd, PPPIOCSCOMPRESS, (unsigned long) odata);
+ return do_ioctl(file, PPPIOCSCOMPRESS, (unsigned long) odata);
}
#ifdef CONFIG_BLOCK
@@ -512,12 +533,13 @@ struct mtpos32 {
};
#define MTIOCPOS32 _IOR('m', 3, struct mtpos32)
-static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, void __user *argp)
+static int mt_ioctl_trans(struct file *file,
+ unsigned int cmd, void __user *argp)
{
- mm_segment_t old_fs = get_fs();
- struct mtget get;
+ /* NULL initialization to make gcc shut up */
+ struct mtget __user *get = NULL;
struct mtget32 __user *umget32;
- struct mtpos pos;
+ struct mtpos __user *pos = NULL;
struct mtpos32 __user *upos32;
unsigned long kcmd;
void *karg;
@@ -526,32 +548,34 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, void __user *argp)
switch(cmd) {
case MTIOCPOS32:
kcmd = MTIOCPOS;
- karg = &pos;
+ pos = compat_alloc_user_space(sizeof(*pos));
+ karg = pos;
break;
default: /* MTIOCGET32 */
kcmd = MTIOCGET;
- karg = &get;
+ get = compat_alloc_user_space(sizeof(*get));
+ karg = get;
break;
}
- set_fs (KERNEL_DS);
- err = sys_ioctl (fd, kcmd, (unsigned long)karg);
- set_fs (old_fs);
+ if (karg == NULL)
+ return -EFAULT;
+ err = do_ioctl(file, kcmd, (unsigned long)karg);
if (err)
return err;
switch (cmd) {
case MTIOCPOS32:
upos32 = argp;
- err = __put_user(pos.mt_blkno, &upos32->mt_blkno);
+ err = convert_in_user(&pos->mt_blkno, &upos32->mt_blkno);
break;
case MTIOCGET32:
umget32 = argp;
- err = __put_user(get.mt_type, &umget32->mt_type);
- err |= __put_user(get.mt_resid, &umget32->mt_resid);
- err |= __put_user(get.mt_dsreg, &umget32->mt_dsreg);
- err |= __put_user(get.mt_gstat, &umget32->mt_gstat);
- err |= __put_user(get.mt_erreg, &umget32->mt_erreg);
- err |= __put_user(get.mt_fileno, &umget32->mt_fileno);
- err |= __put_user(get.mt_blkno, &umget32->mt_blkno);
+ err = convert_in_user(&get->mt_type, &umget32->mt_type);
+ err |= convert_in_user(&get->mt_resid, &umget32->mt_resid);
+ err |= convert_in_user(&get->mt_dsreg, &umget32->mt_dsreg);
+ err |= convert_in_user(&get->mt_gstat, &umget32->mt_gstat);
+ err |= convert_in_user(&get->mt_erreg, &umget32->mt_erreg);
+ err |= convert_in_user(&get->mt_fileno, &umget32->mt_fileno);
+ err |= convert_in_user(&get->mt_blkno, &umget32->mt_blkno);
break;
}
return err ? -EFAULT: 0;
@@ -605,42 +629,41 @@ struct serial_struct32 {
compat_int_t reserved[1];
};
-static int serial_struct_ioctl(unsigned fd, unsigned cmd,
- struct serial_struct32 __user *ss32)
+static int serial_struct_ioctl(struct file *file,
+ unsigned cmd, struct serial_struct32 __user *ss32)
{
typedef struct serial_struct32 SS32;
int err;
- struct serial_struct ss;
- mm_segment_t oldseg = get_fs();
+ struct serial_struct __user *ss = compat_alloc_user_space(sizeof(*ss));
__u32 udata;
unsigned int base;
+ unsigned char *iomem_base;
+ if (ss == NULL)
+ return -EFAULT;
if (cmd == TIOCSSERIAL) {
- if (!access_ok(VERIFY_READ, ss32, sizeof(SS32)))
- return -EFAULT;
- if (__copy_from_user(&ss, ss32, offsetof(SS32, iomem_base)))
- return -EFAULT;
- if (__get_user(udata, &ss32->iomem_base))
+ if (copy_in_user(ss, ss32, offsetof(SS32, iomem_base)) ||
+ get_user(udata, &ss32->iomem_base))
return -EFAULT;
- ss.iomem_base = compat_ptr(udata);
- if (__get_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift) ||
- __get_user(ss.port_high, &ss32->port_high))
+ iomem_base = compat_ptr(udata);
+ if (put_user(iomem_base, &ss->iomem_base) ||
+ convert_in_user(&ss32->iomem_reg_shift,
+ &ss->iomem_reg_shift) ||
+ convert_in_user(&ss32->port_high, &ss->port_high) ||
+ put_user(0UL, &ss->iomap_base))
return -EFAULT;
- ss.iomap_base = 0UL;
}
- set_fs(KERNEL_DS);
- err = sys_ioctl(fd,cmd,(unsigned long)(&ss));
- set_fs(oldseg);
+ err = do_ioctl(file, cmd, (unsigned long)ss);
if (cmd == TIOCGSERIAL && err >= 0) {
- if (!access_ok(VERIFY_WRITE, ss32, sizeof(SS32)))
- return -EFAULT;
- if (__copy_to_user(ss32,&ss,offsetof(SS32,iomem_base)))
+ if (copy_in_user(ss32, ss, offsetof(SS32, iomem_base)) ||
+ get_user(iomem_base, &ss->iomem_base))
return -EFAULT;
- base = (unsigned long)ss.iomem_base >> 32 ?
- 0xffffffff : (unsigned)(unsigned long)ss.iomem_base;
- if (__put_user(base, &ss32->iomem_base) ||
- __put_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift) ||
- __put_user(ss.port_high, &ss32->port_high))
+ base = (unsigned long)iomem_base >> 32 ?
+ 0xffffffff : (unsigned)(unsigned long)iomem_base;
+ if (put_user(base, &ss32->iomem_base) ||
+ convert_in_user(&ss->iomem_reg_shift,
+ &ss32->iomem_reg_shift) ||
+ convert_in_user(&ss->port_high, &ss32->port_high))
return -EFAULT;
}
return err;
@@ -674,8 +697,8 @@ struct i2c_rdwr_aligned {
struct i2c_msg msgs[0];
};
-static int do_i2c_rdwr_ioctl(unsigned int fd, unsigned int cmd,
- struct i2c_rdwr_ioctl_data32 __user *udata)
+static int do_i2c_rdwr_ioctl(struct file *file,
+ unsigned int cmd, struct i2c_rdwr_ioctl_data32 __user *udata)
{
struct i2c_rdwr_aligned __user *tdata;
struct i2c_msg __user *tmsgs;
@@ -708,11 +731,11 @@ static int do_i2c_rdwr_ioctl(unsigned int fd, unsigned int cmd,
put_user(compat_ptr(datap), &tmsgs[i].buf))
return -EFAULT;
}
- return sys_ioctl(fd, cmd, (unsigned long)tdata);
+ return do_ioctl(file, cmd, (unsigned long)tdata);
}
-static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd,
- struct i2c_smbus_ioctl_data32 __user *udata)
+static int do_i2c_smbus_ioctl(struct file *file,
+ unsigned int cmd, struct i2c_smbus_ioctl_data32 __user *udata)
{
struct i2c_smbus_ioctl_data __user *tdata;
compat_caddr_t datap;
@@ -734,7 +757,7 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd,
__put_user(compat_ptr(datap), &tdata->data))
return -EFAULT;
- return sys_ioctl(fd, cmd, (unsigned long)tdata);
+ return do_ioctl(file, cmd, (unsigned long)tdata);
}
#define RTC_IRQP_READ32 _IOR('p', 0x0b, compat_ulong_t)
@@ -742,29 +765,27 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd,
#define RTC_EPOCH_READ32 _IOR('p', 0x0d, compat_ulong_t)
#define RTC_EPOCH_SET32 _IOW('p', 0x0e, compat_ulong_t)
-static int rtc_ioctl(unsigned fd, unsigned cmd, void __user *argp)
+static int rtc_ioctl(struct file *file,
+ unsigned cmd, void __user *argp)
{
- mm_segment_t oldfs = get_fs();
- compat_ulong_t val32;
- unsigned long kval;
+ unsigned long __user *valp = compat_alloc_user_space(sizeof(*valp));
int ret;
+ if (valp == NULL)
+ return -EFAULT;
switch (cmd) {
case RTC_IRQP_READ32:
case RTC_EPOCH_READ32:
- set_fs(KERNEL_DS);
- ret = sys_ioctl(fd, (cmd == RTC_IRQP_READ32) ?
+ ret = do_ioctl(file, (cmd == RTC_IRQP_READ32) ?
RTC_IRQP_READ : RTC_EPOCH_READ,
- (unsigned long)&kval);
- set_fs(oldfs);
+ (unsigned long)valp);
if (ret)
return ret;
- val32 = kval;
- return put_user(val32, (unsigned int __user *)argp);
+ return convert_in_user(valp, (unsigned int __user *)argp);
case RTC_IRQP_SET32:
- return sys_ioctl(fd, RTC_IRQP_SET, (unsigned long)argp);
+ return do_ioctl(file, RTC_IRQP_SET, (unsigned long)argp);
case RTC_EPOCH_SET32:
- return sys_ioctl(fd, RTC_EPOCH_SET, (unsigned long)argp);
+ return do_ioctl(file, RTC_EPOCH_SET, (unsigned long)argp);
}
return -ENOIOCTLCMD;
@@ -1240,6 +1261,9 @@ COMPATIBLE_IOCTL(HCIUNBLOCKADDR)
COMPATIBLE_IOCTL(HCIINQUIRY)
COMPATIBLE_IOCTL(HCIUARTSETPROTO)
COMPATIBLE_IOCTL(HCIUARTGETPROTO)
+COMPATIBLE_IOCTL(HCIUARTGETDEVICE)
+COMPATIBLE_IOCTL(HCIUARTSETFLAGS)
+COMPATIBLE_IOCTL(HCIUARTGETFLAGS)
COMPATIBLE_IOCTL(RFCOMMCREATEDEV)
COMPATIBLE_IOCTL(RFCOMMRELEASEDEV)
COMPATIBLE_IOCTL(RFCOMMGETDEVLIST)
@@ -1284,12 +1308,6 @@ COMPATIBLE_IOCTL(PCIIOC_CONTROLLER)
COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_IO)
COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_MEM)
COMPATIBLE_IOCTL(PCIIOC_WRITE_COMBINE)
-/* NBD */
-COMPATIBLE_IOCTL(NBD_DO_IT)
-COMPATIBLE_IOCTL(NBD_CLEAR_SOCK)
-COMPATIBLE_IOCTL(NBD_CLEAR_QUE)
-COMPATIBLE_IOCTL(NBD_PRINT_DEBUG)
-COMPATIBLE_IOCTL(NBD_DISCONNECT)
/* i2c */
COMPATIBLE_IOCTL(I2C_SLAVE)
COMPATIBLE_IOCTL(I2C_SLAVE_FORCE)
@@ -1436,53 +1454,53 @@ IGNORE_IOCTL(FBIOGCURSOR32)
* a compat_ioctl operation in the place that handleѕ the
* ioctl for the native case.
*/
-static long do_ioctl_trans(int fd, unsigned int cmd,
+static long do_ioctl_trans(unsigned int cmd,
unsigned long arg, struct file *file)
{
void __user *argp = compat_ptr(arg);
switch (cmd) {
case PPPIOCGIDLE32:
- return ppp_gidle(fd, cmd, argp);
+ return ppp_gidle(file, cmd, argp);
case PPPIOCSCOMPRESS32:
- return ppp_scompress(fd, cmd, argp);
+ return ppp_scompress(file, cmd, argp);
case PPPIOCSPASS32:
case PPPIOCSACTIVE32:
- return ppp_sock_fprog_ioctl_trans(fd, cmd, argp);
+ return ppp_sock_fprog_ioctl_trans(file, cmd, argp);
#ifdef CONFIG_BLOCK
case SG_IO:
- return sg_ioctl_trans(fd, cmd, argp);
+ return sg_ioctl_trans(file, cmd, argp);
case SG_GET_REQUEST_TABLE:
- return sg_grt_trans(fd, cmd, argp);
+ return sg_grt_trans(file, cmd, argp);
case MTIOCGET32:
case MTIOCPOS32:
- return mt_ioctl_trans(fd, cmd, argp);
+ return mt_ioctl_trans(file, cmd, argp);
#endif
/* Serial */
case TIOCGSERIAL:
case TIOCSSERIAL:
- return serial_struct_ioctl(fd, cmd, argp);
+ return serial_struct_ioctl(file, cmd, argp);
/* i2c */
case I2C_FUNCS:
- return w_long(fd, cmd, argp);
+ return w_long(file, cmd, argp);
case I2C_RDWR:
- return do_i2c_rdwr_ioctl(fd, cmd, argp);
+ return do_i2c_rdwr_ioctl(file, cmd, argp);
case I2C_SMBUS:
- return do_i2c_smbus_ioctl(fd, cmd, argp);
+ return do_i2c_smbus_ioctl(file, cmd, argp);
/* Not implemented in the native kernel */
case RTC_IRQP_READ32:
case RTC_IRQP_SET32:
case RTC_EPOCH_READ32:
case RTC_EPOCH_SET32:
- return rtc_ioctl(fd, cmd, argp);
+ return rtc_ioctl(file, cmd, argp);
/* dvb */
case VIDEO_GET_EVENT:
- return do_video_get_event(fd, cmd, argp);
+ return do_video_get_event(file, cmd, argp);
case VIDEO_STILLPICTURE:
- return do_video_stillpicture(fd, cmd, argp);
+ return do_video_stillpicture(file, cmd, argp);
case VIDEO_SET_SPU_PALETTE:
- return do_video_set_spu_palette(fd, cmd, argp);
+ return do_video_set_spu_palette(file, cmd, argp);
}
/*
@@ -1508,12 +1526,7 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
case KDSKBMETA:
case KDSKBLED:
case KDSETLED:
- /* NBD */
- case NBD_SET_SOCK:
- case NBD_SET_BLKSIZE:
- case NBD_SET_SIZE:
- case NBD_SET_SIZE_BLOCKS:
- return do_vfs_ioctl(file, fd, cmd, arg);
+ return vfs_ioctl(file, cmd, arg);
}
return -ENOIOCTLCMD;
@@ -1580,6 +1593,11 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
goto out_fput;
#endif
+ case FICLONE:
+ case FICLONERANGE:
+ case FIDEDUPERANGE:
+ goto do_ioctl;
+
case FIBMAP:
case FIGETBSZ:
case FIONREAD:
@@ -1602,7 +1620,7 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
if (compat_ioctl_check_table(XFORM(cmd)))
goto found_handler;
- error = do_ioctl_trans(fd, cmd, arg, f.file);
+ error = do_ioctl_trans(cmd, arg, f.file);
if (error == -ENOIOCTLCMD)
error = -ENOTTY;
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index b65d1ef53..ccc31fa6f 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -53,13 +53,14 @@ struct configfs_dirent {
#define CONFIGFS_ROOT 0x0001
#define CONFIGFS_DIR 0x0002
#define CONFIGFS_ITEM_ATTR 0x0004
+#define CONFIGFS_ITEM_BIN_ATTR 0x0008
#define CONFIGFS_ITEM_LINK 0x0020
#define CONFIGFS_USET_DIR 0x0040
#define CONFIGFS_USET_DEFAULT 0x0080
#define CONFIGFS_USET_DROPPING 0x0100
#define CONFIGFS_USET_IN_MKDIR 0x0200
#define CONFIGFS_USET_CREATING 0x0400
-#define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR)
+#define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR | CONFIGFS_ITEM_BIN_ATTR)
extern struct mutex configfs_symlink_mutex;
extern spinlock_t configfs_dirent_lock;
@@ -72,6 +73,8 @@ extern struct inode * configfs_new_inode(umode_t mode, struct configfs_dirent *,
extern int configfs_create(struct dentry *, umode_t mode, void (*init)(struct inode *));
extern int configfs_create_file(struct config_item *, const struct configfs_attribute *);
+extern int configfs_create_bin_file(struct config_item *,
+ const struct configfs_bin_attribute *);
extern int configfs_make_dirent(struct configfs_dirent *,
struct dentry *, void *, umode_t, int);
extern int configfs_dirent_is_ready(struct configfs_dirent *);
@@ -88,7 +91,7 @@ extern void configfs_release_fs(void);
extern struct rw_semaphore configfs_rename_sem;
extern const struct file_operations configfs_dir_operations;
extern const struct file_operations configfs_file_operations;
-extern const struct file_operations bin_fops;
+extern const struct file_operations configfs_bin_file_operations;
extern const struct inode_operations configfs_dir_inode_operations;
extern const struct inode_operations configfs_root_inode_operations;
extern const struct inode_operations configfs_symlink_inode_operations;
@@ -119,6 +122,13 @@ static inline struct configfs_attribute * to_attr(struct dentry * dentry)
return ((struct configfs_attribute *) sd->s_element);
}
+static inline struct configfs_bin_attribute *to_bin_attr(struct dentry *dentry)
+{
+ struct configfs_attribute *attr = to_attr(dentry);
+
+ return container_of(attr, struct configfs_bin_attribute, cb_attr);
+}
+
static inline struct config_item *configfs_get_config_item(struct dentry *dentry)
{
struct config_item * item = NULL;
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index a7a1b218f..f419519ec 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -255,6 +255,12 @@ static void configfs_init_file(struct inode * inode)
inode->i_fop = &configfs_file_operations;
}
+static void configfs_init_bin_file(struct inode *inode)
+{
+ inode->i_size = 0;
+ inode->i_fop = &configfs_bin_file_operations;
+}
+
static void init_symlink(struct inode * inode)
{
inode->i_op = &configfs_symlink_inode_operations;
@@ -423,7 +429,9 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
spin_unlock(&configfs_dirent_lock);
error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG,
- configfs_init_file);
+ (sd->s_type & CONFIGFS_ITEM_BIN_ATTR) ?
+ configfs_init_bin_file :
+ configfs_init_file);
if (error) {
configfs_put(sd);
return error;
@@ -583,6 +591,7 @@ static int populate_attrs(struct config_item *item)
{
struct config_item_type *t = item->ci_type;
struct configfs_attribute *attr;
+ struct configfs_bin_attribute *bin_attr;
int error = 0;
int i;
@@ -594,6 +603,13 @@ static int populate_attrs(struct config_item *item)
break;
}
}
+ if (t->ct_bin_attrs) {
+ for (i = 0; (bin_attr = t->ct_bin_attrs[i]) != NULL; i++) {
+ error = configfs_create_bin_file(item, bin_attr);
+ if (error)
+ break;
+ }
+ }
if (error)
detach_attrs(item);
@@ -624,13 +640,13 @@ static void detach_groups(struct config_group *group)
child = sd->s_dentry;
- mutex_lock(&d_inode(child)->i_mutex);
+ inode_lock(d_inode(child));
configfs_detach_group(sd->s_element);
d_inode(child)->i_flags |= S_DEAD;
dont_mount(child);
- mutex_unlock(&d_inode(child)->i_mutex);
+ inode_unlock(d_inode(child));
d_delete(child);
dput(child);
@@ -818,11 +834,11 @@ static int configfs_attach_item(struct config_item *parent_item,
* the VFS may already have hit and used them. Thus,
* we must lock them as rmdir() would.
*/
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
configfs_remove_dir(item);
d_inode(dentry)->i_flags |= S_DEAD;
dont_mount(dentry);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
d_delete(dentry);
}
}
@@ -858,7 +874,7 @@ static int configfs_attach_group(struct config_item *parent_item,
* We must also lock the inode to remove it safely in case of
* error, as rmdir() would.
*/
- mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
configfs_adjust_dir_dirent_depth_before_populate(sd);
ret = populate_groups(to_config_group(item));
if (ret) {
@@ -867,7 +883,7 @@ static int configfs_attach_group(struct config_item *parent_item,
dont_mount(dentry);
}
configfs_adjust_dir_dirent_depth_after_populate(sd);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
if (ret)
d_delete(dentry);
}
@@ -1054,11 +1070,55 @@ out:
return ret;
}
+static int configfs_do_depend_item(struct dentry *subsys_dentry,
+ struct config_item *target)
+{
+ struct configfs_dirent *p;
+ int ret;
+
+ spin_lock(&configfs_dirent_lock);
+ /* Scan the tree, return 0 if found */
+ ret = configfs_depend_prep(subsys_dentry, target);
+ if (ret)
+ goto out_unlock_dirent_lock;
+
+ /*
+ * We are sure that the item is not about to be removed by rmdir(), and
+ * not in the middle of attachment by mkdir().
+ */
+ p = target->ci_dentry->d_fsdata;
+ p->s_dependent_count += 1;
+
+out_unlock_dirent_lock:
+ spin_unlock(&configfs_dirent_lock);
+
+ return ret;
+}
+
+static inline struct configfs_dirent *
+configfs_find_subsys_dentry(struct configfs_dirent *root_sd,
+ struct config_item *subsys_item)
+{
+ struct configfs_dirent *p;
+ struct configfs_dirent *ret = NULL;
+
+ list_for_each_entry(p, &root_sd->s_children, s_sibling) {
+ if (p->s_type & CONFIGFS_DIR &&
+ p->s_element == subsys_item) {
+ ret = p;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+
int configfs_depend_item(struct configfs_subsystem *subsys,
struct config_item *target)
{
int ret;
- struct configfs_dirent *p, *root_sd, *subsys_sd = NULL;
+ struct configfs_dirent *subsys_sd;
struct config_item *s_item = &subsys->su_group.cg_item;
struct dentry *root;
@@ -1075,43 +1135,19 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
* subsystem is really registered, and so we need to lock out
* configfs_[un]register_subsystem().
*/
- mutex_lock(&d_inode(root)->i_mutex);
-
- root_sd = root->d_fsdata;
-
- list_for_each_entry(p, &root_sd->s_children, s_sibling) {
- if (p->s_type & CONFIGFS_DIR) {
- if (p->s_element == s_item) {
- subsys_sd = p;
- break;
- }
- }
- }
+ inode_lock(d_inode(root));
+ subsys_sd = configfs_find_subsys_dentry(root->d_fsdata, s_item);
if (!subsys_sd) {
ret = -ENOENT;
goto out_unlock_fs;
}
/* Ok, now we can trust subsys/s_item */
+ ret = configfs_do_depend_item(subsys_sd->s_dentry, target);
- spin_lock(&configfs_dirent_lock);
- /* Scan the tree, return 0 if found */
- ret = configfs_depend_prep(subsys_sd->s_dentry, target);
- if (ret)
- goto out_unlock_dirent_lock;
-
- /*
- * We are sure that the item is not about to be removed by rmdir(), and
- * not in the middle of attachment by mkdir().
- */
- p = target->ci_dentry->d_fsdata;
- p->s_dependent_count += 1;
-
-out_unlock_dirent_lock:
- spin_unlock(&configfs_dirent_lock);
out_unlock_fs:
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
/*
* If we succeeded, the fs is pinned via other methods. If not,
@@ -1128,8 +1164,7 @@ EXPORT_SYMBOL(configfs_depend_item);
* configfs_depend_item() because we know that that the client driver is
* pinned, thus the subsystem is pinned, and therefore configfs is pinned.
*/
-void configfs_undepend_item(struct configfs_subsystem *subsys,
- struct config_item *target)
+void configfs_undepend_item(struct config_item *target)
{
struct configfs_dirent *sd;
@@ -1152,6 +1187,79 @@ void configfs_undepend_item(struct configfs_subsystem *subsys,
}
EXPORT_SYMBOL(configfs_undepend_item);
+/*
+ * caller_subsys is a caller's subsystem not target's. This is used to
+ * determine if we should lock root and check subsys or not. When we are
+ * in the same subsystem as our target there is no need to do locking as
+ * we know that subsys is valid and is not unregistered during this function
+ * as we are called from callback of one of his children and VFS holds a lock
+ * on some inode. Otherwise we have to lock our root to ensure that target's
+ * subsystem it is not unregistered during this function.
+ */
+int configfs_depend_item_unlocked(struct configfs_subsystem *caller_subsys,
+ struct config_item *target)
+{
+ struct configfs_subsystem *target_subsys;
+ struct config_group *root, *parent;
+ struct configfs_dirent *subsys_sd;
+ int ret = -ENOENT;
+
+ /* Disallow this function for configfs root */
+ if (configfs_is_root(target))
+ return -EINVAL;
+
+ parent = target->ci_group;
+ /*
+ * This may happen when someone is trying to depend root
+ * directory of some subsystem
+ */
+ if (configfs_is_root(&parent->cg_item)) {
+ target_subsys = to_configfs_subsystem(to_config_group(target));
+ root = parent;
+ } else {
+ target_subsys = parent->cg_subsys;
+ /* Find a cofnigfs root as we may need it for locking */
+ for (root = parent; !configfs_is_root(&root->cg_item);
+ root = root->cg_item.ci_group)
+ ;
+ }
+
+ if (target_subsys != caller_subsys) {
+ /*
+ * We are in other configfs subsystem, so we have to do
+ * additional locking to prevent other subsystem from being
+ * unregistered
+ */
+ inode_lock(d_inode(root->cg_item.ci_dentry));
+
+ /*
+ * As we are trying to depend item from other subsystem
+ * we have to check if this subsystem is still registered
+ */
+ subsys_sd = configfs_find_subsys_dentry(
+ root->cg_item.ci_dentry->d_fsdata,
+ &target_subsys->su_group.cg_item);
+ if (!subsys_sd)
+ goto out_root_unlock;
+ } else {
+ subsys_sd = target_subsys->su_group.cg_item.ci_dentry->d_fsdata;
+ }
+
+ /* Now we can execute core of depend item */
+ ret = configfs_do_depend_item(subsys_sd->s_dentry, target);
+
+ if (target_subsys != caller_subsys)
+out_root_unlock:
+ /*
+ * We were called from subsystem other than our target so we
+ * took some locks so now it's time to release them
+ */
+ inode_unlock(d_inode(root->cg_item.ci_dentry));
+
+ return ret;
+}
+EXPORT_SYMBOL(configfs_depend_item_unlocked);
+
static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
int ret = 0;
@@ -1453,7 +1561,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name)
down_write(&configfs_rename_sem);
parent = item->parent->dentry;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
if (!IS_ERR(new_dentry)) {
@@ -1469,7 +1577,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name)
error = -EEXIST;
dput(new_dentry);
}
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
up_write(&configfs_rename_sem);
return error;
@@ -1482,7 +1590,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
struct configfs_dirent * parent_sd = dentry->d_fsdata;
int err;
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
/*
* Fake invisibility if dir belongs to a group/default groups hierarchy
* being attached
@@ -1495,7 +1603,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
else
err = 0;
}
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
return err;
}
@@ -1505,11 +1613,11 @@ static int configfs_dir_close(struct inode *inode, struct file *file)
struct dentry * dentry = file->f_path.dentry;
struct configfs_dirent * cursor = file->private_data;
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
spin_lock(&configfs_dirent_lock);
list_del_init(&cursor->s_sibling);
spin_unlock(&configfs_dirent_lock);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
release_configfs_dirent(cursor);
@@ -1590,7 +1698,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
{
struct dentry * dentry = file->f_path.dentry;
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
switch (whence) {
case 1:
offset += file->f_pos;
@@ -1598,7 +1706,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
if (offset >= 0)
break;
default:
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
return -EINVAL;
}
if (offset != file->f_pos) {
@@ -1624,7 +1732,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
spin_unlock(&configfs_dirent_lock);
}
}
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
return offset;
}
@@ -1659,14 +1767,14 @@ int configfs_register_group(struct config_group *parent_group,
parent = parent_group->cg_item.ci_dentry;
- mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
ret = create_default_group(parent_group, group);
if (!ret) {
spin_lock(&configfs_dirent_lock);
configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata);
spin_unlock(&configfs_dirent_lock);
}
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
return ret;
}
EXPORT_SYMBOL(configfs_register_group);
@@ -1683,7 +1791,7 @@ void configfs_unregister_group(struct config_group *group)
struct dentry *dentry = group->cg_item.ci_dentry;
struct dentry *parent = group->cg_item.ci_parent->ci_dentry;
- mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
spin_lock(&configfs_dirent_lock);
configfs_detach_prep(dentry, NULL);
spin_unlock(&configfs_dirent_lock);
@@ -1692,7 +1800,7 @@ void configfs_unregister_group(struct config_group *group)
d_inode(dentry)->i_flags |= S_DEAD;
dont_mount(dentry);
d_delete(dentry);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
dput(dentry);
@@ -1764,7 +1872,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
sd = root->d_fsdata;
link_group(to_config_group(sd->s_element), group);
- mutex_lock_nested(&d_inode(root)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(root), I_MUTEX_PARENT);
err = -ENOMEM;
dentry = d_alloc_name(root, group->cg_item.ci_name);
@@ -1784,7 +1892,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
}
}
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
if (err) {
unlink_group(group);
@@ -1805,9 +1913,9 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
return;
}
- mutex_lock_nested(&d_inode(root)->i_mutex,
+ inode_lock_nested(d_inode(root),
I_MUTEX_PARENT);
- mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
mutex_lock(&configfs_symlink_mutex);
spin_lock(&configfs_dirent_lock);
if (configfs_detach_prep(dentry, NULL)) {
@@ -1818,11 +1926,11 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
configfs_detach_group(&group->cg_item);
d_inode(dentry)->i_flags |= S_DEAD;
dont_mount(dentry);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
d_delete(dentry);
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
dput(dentry);
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index d39099ea7..33b7ee34e 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -28,6 +28,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/mutex.h>
+#include <linux/vmalloc.h>
#include <asm/uaccess.h>
#include <linux/configfs.h>
@@ -48,6 +49,10 @@ struct configfs_buffer {
struct configfs_item_operations * ops;
struct mutex mutex;
int needs_read_fill;
+ bool read_in_progress;
+ bool write_in_progress;
+ char *bin_buffer;
+ int bin_buffer_size;
};
@@ -123,6 +128,87 @@ out:
return retval;
}
+/**
+ * configfs_read_bin_file - read a binary attribute.
+ * @file: file pointer.
+ * @buf: buffer to fill.
+ * @count: number of bytes to read.
+ * @ppos: starting offset in file.
+ *
+ * Userspace wants to read a binary attribute file. The attribute
+ * descriptor is in the file's ->d_fsdata. The target item is in the
+ * directory's ->d_fsdata.
+ *
+ * We check whether we need to refill the buffer. If so we will
+ * call the attributes' attr->read() twice. The first time we
+ * will pass a NULL as a buffer pointer, which the attributes' method
+ * will use to return the size of the buffer required. If no error
+ * occurs we will allocate the buffer using vmalloc and call
+ * attr->read() again passing that buffer as an argument.
+ * Then we just copy to user-space using simple_read_from_buffer.
+ */
+
+static ssize_t
+configfs_read_bin_file(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct configfs_buffer *buffer = file->private_data;
+ struct dentry *dentry = file->f_path.dentry;
+ struct config_item *item = to_item(dentry->d_parent);
+ struct configfs_bin_attribute *bin_attr = to_bin_attr(dentry);
+ ssize_t retval = 0;
+ ssize_t len = min_t(size_t, count, PAGE_SIZE);
+
+ mutex_lock(&buffer->mutex);
+
+ /* we don't support switching read/write modes */
+ if (buffer->write_in_progress) {
+ retval = -ETXTBSY;
+ goto out;
+ }
+ buffer->read_in_progress = 1;
+
+ if (buffer->needs_read_fill) {
+ /* perform first read with buf == NULL to get extent */
+ len = bin_attr->read(item, NULL, 0);
+ if (len <= 0) {
+ retval = len;
+ goto out;
+ }
+
+ /* do not exceed the maximum value */
+ if (bin_attr->cb_max_size && len > bin_attr->cb_max_size) {
+ retval = -EFBIG;
+ goto out;
+ }
+
+ buffer->bin_buffer = vmalloc(len);
+ if (buffer->bin_buffer == NULL) {
+ retval = -ENOMEM;
+ goto out;
+ }
+ buffer->bin_buffer_size = len;
+
+ /* perform second read to fill buffer */
+ len = bin_attr->read(item, buffer->bin_buffer, len);
+ if (len < 0) {
+ retval = len;
+ vfree(buffer->bin_buffer);
+ buffer->bin_buffer_size = 0;
+ buffer->bin_buffer = NULL;
+ goto out;
+ }
+
+ buffer->needs_read_fill = 0;
+ }
+
+ retval = simple_read_from_buffer(buf, count, ppos, buffer->bin_buffer,
+ buffer->bin_buffer_size);
+out:
+ mutex_unlock(&buffer->mutex);
+ return retval;
+}
+
/**
* fill_write_buffer - copy buffer from userspace.
@@ -209,10 +295,80 @@ configfs_write_file(struct file *file, const char __user *buf, size_t count, lof
return len;
}
-static int check_perm(struct inode * inode, struct file * file)
+/**
+ * configfs_write_bin_file - write a binary attribute.
+ * @file: file pointer
+ * @buf: data to write
+ * @count: number of bytes
+ * @ppos: starting offset
+ *
+ * Writing to a binary attribute file is similar to a normal read.
+ * We buffer the consecutive writes (binary attribute files do not
+ * support lseek) in a continuously growing buffer, but we don't
+ * commit until the close of the file.
+ */
+
+static ssize_t
+configfs_write_bin_file(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct configfs_buffer *buffer = file->private_data;
+ struct dentry *dentry = file->f_path.dentry;
+ struct configfs_bin_attribute *bin_attr = to_bin_attr(dentry);
+ void *tbuf = NULL;
+ ssize_t len;
+
+ mutex_lock(&buffer->mutex);
+
+ /* we don't support switching read/write modes */
+ if (buffer->read_in_progress) {
+ len = -ETXTBSY;
+ goto out;
+ }
+ buffer->write_in_progress = 1;
+
+ /* buffer grows? */
+ if (*ppos + count > buffer->bin_buffer_size) {
+
+ if (bin_attr->cb_max_size &&
+ *ppos + count > bin_attr->cb_max_size) {
+ len = -EFBIG;
+ }
+
+ tbuf = vmalloc(*ppos + count);
+ if (tbuf == NULL) {
+ len = -ENOMEM;
+ goto out;
+ }
+
+ /* copy old contents */
+ if (buffer->bin_buffer) {
+ memcpy(tbuf, buffer->bin_buffer,
+ buffer->bin_buffer_size);
+ vfree(buffer->bin_buffer);
+ }
+
+ /* clear the new area */
+ memset(tbuf + buffer->bin_buffer_size, 0,
+ *ppos + count - buffer->bin_buffer_size);
+ buffer->bin_buffer = tbuf;
+ buffer->bin_buffer_size = *ppos + count;
+ }
+
+ len = simple_write_to_buffer(buffer->bin_buffer,
+ buffer->bin_buffer_size, ppos, buf, count);
+ if (len > 0)
+ *ppos += len;
+out:
+ mutex_unlock(&buffer->mutex);
+ return len;
+}
+
+static int check_perm(struct inode * inode, struct file * file, int type)
{
struct config_item *item = configfs_get_config_item(file->f_path.dentry->d_parent);
struct configfs_attribute * attr = to_attr(file->f_path.dentry);
+ struct configfs_bin_attribute *bin_attr = NULL;
struct configfs_buffer * buffer;
struct configfs_item_operations * ops = NULL;
int error = 0;
@@ -220,6 +376,9 @@ static int check_perm(struct inode * inode, struct file * file)
if (!item || !attr)
goto Einval;
+ if (type & CONFIGFS_ITEM_BIN_ATTR)
+ bin_attr = to_bin_attr(file->f_path.dentry);
+
/* Grab the module reference for this attribute if we have one */
if (!try_module_get(attr->ca_owner)) {
error = -ENODEV;
@@ -236,9 +395,14 @@ static int check_perm(struct inode * inode, struct file * file)
* and we must have a store method.
*/
if (file->f_mode & FMODE_WRITE) {
- if (!(inode->i_mode & S_IWUGO) || !attr->store)
+ if (!(inode->i_mode & S_IWUGO))
+ goto Eaccess;
+
+ if ((type & CONFIGFS_ITEM_ATTR) && !attr->store)
goto Eaccess;
+ if ((type & CONFIGFS_ITEM_BIN_ATTR) && !bin_attr->write)
+ goto Eaccess;
}
/* File needs read support.
@@ -246,7 +410,13 @@ static int check_perm(struct inode * inode, struct file * file)
* must be a show method for it.
*/
if (file->f_mode & FMODE_READ) {
- if (!(inode->i_mode & S_IRUGO) || !attr->show)
+ if (!(inode->i_mode & S_IRUGO))
+ goto Eaccess;
+
+ if ((type & CONFIGFS_ITEM_ATTR) && !attr->show)
+ goto Eaccess;
+
+ if ((type & CONFIGFS_ITEM_BIN_ATTR) && !bin_attr->read)
goto Eaccess;
}
@@ -260,6 +430,8 @@ static int check_perm(struct inode * inode, struct file * file)
}
mutex_init(&buffer->mutex);
buffer->needs_read_fill = 1;
+ buffer->read_in_progress = 0;
+ buffer->write_in_progress = 0;
buffer->ops = ops;
file->private_data = buffer;
goto Done;
@@ -277,12 +449,7 @@ static int check_perm(struct inode * inode, struct file * file)
return error;
}
-static int configfs_open_file(struct inode * inode, struct file * filp)
-{
- return check_perm(inode,filp);
-}
-
-static int configfs_release(struct inode * inode, struct file * filp)
+static int configfs_release(struct inode *inode, struct file *filp)
{
struct config_item * item = to_item(filp->f_path.dentry->d_parent);
struct configfs_attribute * attr = to_attr(filp->f_path.dentry);
@@ -303,6 +470,47 @@ static int configfs_release(struct inode * inode, struct file * filp)
return 0;
}
+static int configfs_open_file(struct inode *inode, struct file *filp)
+{
+ return check_perm(inode, filp, CONFIGFS_ITEM_ATTR);
+}
+
+static int configfs_open_bin_file(struct inode *inode, struct file *filp)
+{
+ return check_perm(inode, filp, CONFIGFS_ITEM_BIN_ATTR);
+}
+
+static int configfs_release_bin_file(struct inode *inode, struct file *filp)
+{
+ struct configfs_buffer *buffer = filp->private_data;
+ struct dentry *dentry = filp->f_path.dentry;
+ struct config_item *item = to_item(dentry->d_parent);
+ struct configfs_bin_attribute *bin_attr = to_bin_attr(dentry);
+ ssize_t len = 0;
+ int ret;
+
+ buffer->read_in_progress = 0;
+
+ if (buffer->write_in_progress) {
+ buffer->write_in_progress = 0;
+
+ len = bin_attr->write(item, buffer->bin_buffer,
+ buffer->bin_buffer_size);
+
+ /* vfree on NULL is safe */
+ vfree(buffer->bin_buffer);
+ buffer->bin_buffer = NULL;
+ buffer->bin_buffer_size = 0;
+ buffer->needs_read_fill = 1;
+ }
+
+ ret = configfs_release(inode, filp);
+ if (len < 0)
+ return len;
+ return ret;
+}
+
+
const struct file_operations configfs_file_operations = {
.read = configfs_read_file,
.write = configfs_write_file,
@@ -311,6 +519,14 @@ const struct file_operations configfs_file_operations = {
.release = configfs_release,
};
+const struct file_operations configfs_bin_file_operations = {
+ .read = configfs_read_bin_file,
+ .write = configfs_write_bin_file,
+ .llseek = NULL, /* bin file is not seekable */
+ .open = configfs_open_bin_file,
+ .release = configfs_release_bin_file,
+};
+
/**
* configfs_create_file - create an attribute file for an item.
* @item: item we're creating for.
@@ -324,11 +540,32 @@ int configfs_create_file(struct config_item * item, const struct configfs_attrib
umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG;
int error = 0;
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_NORMAL);
+ inode_lock_nested(d_inode(dir), I_MUTEX_NORMAL);
error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode,
CONFIGFS_ITEM_ATTR);
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
return error;
}
+/**
+ * configfs_create_bin_file - create a binary attribute file for an item.
+ * @item: item we're creating for.
+ * @attr: atrribute descriptor.
+ */
+
+int configfs_create_bin_file(struct config_item *item,
+ const struct configfs_bin_attribute *bin_attr)
+{
+ struct dentry *dir = item->ci_dentry;
+ struct configfs_dirent *parent_sd = dir->d_fsdata;
+ umode_t mode = (bin_attr->cb_attr.ca_mode & S_IALLUGO) | S_IFREG;
+ int error = 0;
+
+ inode_lock_nested(dir->d_inode, I_MUTEX_NORMAL);
+ error = configfs_make_dirent(parent_sd, NULL, (void *) bin_attr, mode,
+ CONFIGFS_ITEM_BIN_ATTR);
+ inode_unlock(dir->d_inode);
+
+ return error;
+}
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index eae87575e..cee087d8f 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -218,7 +218,7 @@ const unsigned char * configfs_get_name(struct configfs_dirent *sd)
if (sd->s_type & (CONFIGFS_DIR | CONFIGFS_ITEM_LINK))
return sd->s_dentry->d_name.name;
- if (sd->s_type & CONFIGFS_ITEM_ATTR) {
+ if (sd->s_type & (CONFIGFS_ITEM_ATTR | CONFIGFS_ITEM_BIN_ATTR)) {
attr = sd->s_element;
return attr->ca_name;
}
@@ -255,7 +255,7 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name)
/* no inode means this hasn't been made visible yet */
return;
- mutex_lock(&d_inode(dir)->i_mutex);
+ inode_lock(d_inode(dir));
list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
if (!sd->s_element)
continue;
@@ -268,5 +268,5 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name)
break;
}
}
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
}
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index ec5c8325b..db6d69289 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -279,27 +279,33 @@ static int configfs_getlink(struct dentry *dentry, char * path)
}
-static const char *configfs_follow_link(struct dentry *dentry, void **cookie)
+static const char *configfs_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- unsigned long page = get_zeroed_page(GFP_KERNEL);
+ char *body;
int error;
- if (!page)
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
+ body = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!body)
return ERR_PTR(-ENOMEM);
- error = configfs_getlink(dentry, (char *)page);
+ error = configfs_getlink(dentry, body);
if (!error) {
- return *cookie = (void *)page;
+ set_delayed_call(done, kfree_link, body);
+ return body;
}
- free_page(page);
+ kfree(body);
return ERR_PTR(error);
}
const struct inode_operations configfs_symlink_inode_operations = {
- .follow_link = configfs_follow_link,
+ .get_link = configfs_get_link,
.readlink = generic_readlink,
- .put_link = free_page_put_link,
.setattr = configfs_setattr,
};
diff --git a/fs/coredump.c b/fs/coredump.c
index 1777331ee..9ea87e9fd 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -32,6 +32,7 @@
#include <linux/pipe_fs_i.h>
#include <linux/oom.h>
#include <linux/compat.h>
+#include <linux/timekeeping.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -117,6 +118,26 @@ int cn_esc_printf(struct core_name *cn, const char *fmt, ...)
ret = cn_vprintf(cn, fmt, arg);
va_end(arg);
+ if (ret == 0) {
+ /*
+ * Ensure that this coredump name component can't cause the
+ * resulting corefile path to consist of a ".." or ".".
+ */
+ if ((cn->used - cur == 1 && cn->corename[cur] == '.') ||
+ (cn->used - cur == 2 && cn->corename[cur] == '.'
+ && cn->corename[cur+1] == '.'))
+ cn->corename[cur] = '!';
+
+ /*
+ * Empty names are fishy and could be used to create a "//" in a
+ * corefile name, causing the coredump to happen one directory
+ * level too high. Enforce that all components of the core
+ * pattern are at least one character long.
+ */
+ if (cn->used == cur)
+ ret = cn_printf(cn, "!");
+ }
+
for (; cur < cn->used; ++cur) {
if (cn->corename[cur] == '/')
cn->corename[cur] = '!';
@@ -232,9 +253,10 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm)
break;
/* UNIX time of coredump */
case 't': {
- struct timeval tv;
- do_gettimeofday(&tv);
- err = cn_printf(cn, "%lu", tv.tv_sec);
+ time64_t time;
+
+ time = ktime_get_real_seconds();
+ err = cn_printf(cn, "%lld", time);
break;
}
/* hostname */
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 355c522f3..b862bc219 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -100,6 +100,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
break;
case S_IFLNK:
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_data.a_ops = &cramfs_aops;
break;
default:
diff --git a/fs/dax.c b/fs/dax.c
index 43671b682..bbb2ad783 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -24,57 +24,91 @@
#include <linux/memcontrol.h>
#include <linux/mm.h>
#include <linux/mutex.h>
+#include <linux/pagevec.h>
#include <linux/pmem.h>
#include <linux/sched.h>
#include <linux/uio.h>
#include <linux/vmstat.h>
+#include <linux/pfn_t.h>
+#include <linux/sizes.h>
+
+static long dax_map_atomic(struct block_device *bdev, struct blk_dax_ctl *dax)
+{
+ struct request_queue *q = bdev->bd_queue;
+ long rc = -EIO;
+
+ dax->addr = (void __pmem *) ERR_PTR(-EIO);
+ if (blk_queue_enter(q, true) != 0)
+ return rc;
+
+ rc = bdev_direct_access(bdev, dax);
+ if (rc < 0) {
+ dax->addr = (void __pmem *) ERR_PTR(rc);
+ blk_queue_exit(q);
+ return rc;
+ }
+ return rc;
+}
+
+static void dax_unmap_atomic(struct block_device *bdev,
+ const struct blk_dax_ctl *dax)
+{
+ if (IS_ERR(dax->addr))
+ return;
+ blk_queue_exit(bdev->bd_queue);
+}
+
+struct page *read_dax_sector(struct block_device *bdev, sector_t n)
+{
+ struct page *page = alloc_pages(GFP_KERNEL, 0);
+ struct blk_dax_ctl dax = {
+ .size = PAGE_SIZE,
+ .sector = n & ~((((int) PAGE_SIZE) / 512) - 1),
+ };
+ long rc;
+
+ if (!page)
+ return ERR_PTR(-ENOMEM);
+
+ rc = dax_map_atomic(bdev, &dax);
+ if (rc < 0)
+ return ERR_PTR(rc);
+ memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE);
+ dax_unmap_atomic(bdev, &dax);
+ return page;
+}
/*
- * dax_clear_blocks() is called from within transaction context from XFS,
+ * dax_clear_sectors() is called from within transaction context from XFS,
* and hence this means the stack from this point must follow GFP_NOFS
* semantics for all operations.
*/
-int dax_clear_blocks(struct inode *inode, sector_t block, long size)
+int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size)
{
- struct block_device *bdev = inode->i_sb->s_bdev;
- sector_t sector = block << (inode->i_blkbits - 9);
+ struct blk_dax_ctl dax = {
+ .sector = _sector,
+ .size = _size,
+ };
might_sleep();
do {
- void __pmem *addr;
- unsigned long pfn;
- long count;
+ long count, sz;
- count = bdev_direct_access(bdev, sector, &addr, &pfn, size);
+ count = dax_map_atomic(bdev, &dax);
if (count < 0)
return count;
- BUG_ON(size < count);
- while (count > 0) {
- unsigned pgsz = PAGE_SIZE - offset_in_page(addr);
- if (pgsz > count)
- pgsz = count;
- clear_pmem(addr, pgsz);
- addr += pgsz;
- size -= pgsz;
- count -= pgsz;
- BUG_ON(pgsz & 511);
- sector += pgsz / 512;
- cond_resched();
- }
- } while (size);
+ sz = min_t(long, count, SZ_128K);
+ clear_pmem(dax.addr, sz);
+ dax.size -= sz;
+ dax.sector += sz / 512;
+ dax_unmap_atomic(bdev, &dax);
+ cond_resched();
+ } while (dax.size);
wmb_pmem();
return 0;
}
-EXPORT_SYMBOL_GPL(dax_clear_blocks);
-
-static long dax_get_addr(struct buffer_head *bh, void __pmem **addr,
- unsigned blkbits)
-{
- unsigned long pfn;
- sector_t sector = bh->b_blocknr << (blkbits - 9);
- return bdev_direct_access(bh->b_bdev, sector, addr, &pfn, bh->b_size);
-}
+EXPORT_SYMBOL_GPL(dax_clear_sectors);
/* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */
static void dax_new_buf(void __pmem *addr, unsigned size, unsigned first,
@@ -105,19 +139,29 @@ static bool buffer_size_valid(struct buffer_head *bh)
return bh->b_state != 0;
}
+
+static sector_t to_sector(const struct buffer_head *bh,
+ const struct inode *inode)
+{
+ sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9);
+
+ return sector;
+}
+
static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
loff_t start, loff_t end, get_block_t get_block,
struct buffer_head *bh)
{
- ssize_t retval = 0;
- loff_t pos = start;
- loff_t max = start;
- loff_t bh_max = start;
- void __pmem *addr;
- bool hole = false;
- bool need_wmb = false;
-
- if (iov_iter_rw(iter) != WRITE)
+ loff_t pos = start, max = start, bh_max = start;
+ bool hole = false, need_wmb = false;
+ struct block_device *bdev = NULL;
+ int rw = iov_iter_rw(iter), rc;
+ long map_len = 0;
+ struct blk_dax_ctl dax = {
+ .addr = (void __pmem *) ERR_PTR(-EIO),
+ };
+
+ if (rw == READ)
end = min(end, i_size_read(inode));
while (pos < end) {
@@ -132,13 +176,13 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
if (pos == bh_max) {
bh->b_size = PAGE_ALIGN(end - pos);
bh->b_state = 0;
- retval = get_block(inode, block, bh,
- iov_iter_rw(iter) == WRITE);
- if (retval)
+ rc = get_block(inode, block, bh, rw == WRITE);
+ if (rc)
break;
if (!buffer_size_valid(bh))
bh->b_size = 1 << blkbits;
bh_max = pos - first + bh->b_size;
+ bdev = bh->b_bdev;
} else {
unsigned done = bh->b_size -
(bh_max - (pos - first));
@@ -146,47 +190,53 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
bh->b_size -= done;
}
- hole = iov_iter_rw(iter) != WRITE && !buffer_written(bh);
+ hole = rw == READ && !buffer_written(bh);
if (hole) {
- addr = NULL;
size = bh->b_size - first;
} else {
- retval = dax_get_addr(bh, &addr, blkbits);
- if (retval < 0)
+ dax_unmap_atomic(bdev, &dax);
+ dax.sector = to_sector(bh, inode);
+ dax.size = bh->b_size;
+ map_len = dax_map_atomic(bdev, &dax);
+ if (map_len < 0) {
+ rc = map_len;
break;
+ }
if (buffer_unwritten(bh) || buffer_new(bh)) {
- dax_new_buf(addr, retval, first, pos,
- end);
+ dax_new_buf(dax.addr, map_len, first,
+ pos, end);
need_wmb = true;
}
- addr += first;
- size = retval - first;
+ dax.addr += first;
+ size = map_len - first;
}
max = min(pos + size, end);
}
if (iov_iter_rw(iter) == WRITE) {
- len = copy_from_iter_pmem(addr, max - pos, iter);
+ len = copy_from_iter_pmem(dax.addr, max - pos, iter);
need_wmb = true;
} else if (!hole)
- len = copy_to_iter((void __force *)addr, max - pos,
+ len = copy_to_iter((void __force *) dax.addr, max - pos,
iter);
else
len = iov_iter_zero(max - pos, iter);
if (!len) {
- retval = -EFAULT;
+ rc = -EFAULT;
break;
}
pos += len;
- addr += len;
+ if (!IS_ERR(dax.addr))
+ dax.addr += len;
}
if (need_wmb)
wmb_pmem();
+ dax_unmap_atomic(bdev, &dax);
- return (pos == start) ? retval : pos - start;
+ return (pos == start) ? rc : pos - start;
}
/**
@@ -215,13 +265,14 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
loff_t end = pos + iov_iter_count(iter);
memset(&bh, 0, sizeof(bh));
+ bh.b_bdev = inode->i_sb->s_bdev;
if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) {
struct address_space *mapping = inode->i_mapping;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
retval = filemap_write_and_wait_range(mapping, pos, end - 1);
if (retval) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
goto out;
}
}
@@ -233,7 +284,7 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
retval = dax_io(inode, iter, pos, end, get_block, &bh);
if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if ((retval > 0) && end_io)
end_io(iocb, pos, retval, bh.b_private);
@@ -275,28 +326,231 @@ static int dax_load_hole(struct address_space *mapping, struct page *page,
return VM_FAULT_LOCKED;
}
-static int copy_user_bh(struct page *to, struct buffer_head *bh,
- unsigned blkbits, unsigned long vaddr)
+static int copy_user_bh(struct page *to, struct inode *inode,
+ struct buffer_head *bh, unsigned long vaddr)
{
- void __pmem *vfrom;
+ struct blk_dax_ctl dax = {
+ .sector = to_sector(bh, inode),
+ .size = bh->b_size,
+ };
+ struct block_device *bdev = bh->b_bdev;
void *vto;
- if (dax_get_addr(bh, &vfrom, blkbits) < 0)
- return -EIO;
+ if (dax_map_atomic(bdev, &dax) < 0)
+ return PTR_ERR(dax.addr);
vto = kmap_atomic(to);
- copy_user_page(vto, (void __force *)vfrom, vaddr, to);
+ copy_user_page(vto, (void __force *)dax.addr, vaddr, to);
kunmap_atomic(vto);
+ dax_unmap_atomic(bdev, &dax);
return 0;
}
+#define NO_SECTOR -1
+#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_CACHE_SHIFT))
+
+static int dax_radix_entry(struct address_space *mapping, pgoff_t index,
+ sector_t sector, bool pmd_entry, bool dirty)
+{
+ struct radix_tree_root *page_tree = &mapping->page_tree;
+ pgoff_t pmd_index = DAX_PMD_INDEX(index);
+ int type, error = 0;
+ void *entry;
+
+ WARN_ON_ONCE(pmd_entry && !dirty);
+ if (dirty)
+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+
+ spin_lock_irq(&mapping->tree_lock);
+
+ entry = radix_tree_lookup(page_tree, pmd_index);
+ if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD) {
+ index = pmd_index;
+ goto dirty;
+ }
+
+ entry = radix_tree_lookup(page_tree, index);
+ if (entry) {
+ type = RADIX_DAX_TYPE(entry);
+ if (WARN_ON_ONCE(type != RADIX_DAX_PTE &&
+ type != RADIX_DAX_PMD)) {
+ error = -EIO;
+ goto unlock;
+ }
+
+ if (!pmd_entry || type == RADIX_DAX_PMD)
+ goto dirty;
+
+ /*
+ * We only insert dirty PMD entries into the radix tree. This
+ * means we don't need to worry about removing a dirty PTE
+ * entry and inserting a clean PMD entry, thus reducing the
+ * range we would flush with a follow-up fsync/msync call.
+ */
+ radix_tree_delete(&mapping->page_tree, index);
+ mapping->nrexceptional--;
+ }
+
+ if (sector == NO_SECTOR) {
+ /*
+ * This can happen during correct operation if our pfn_mkwrite
+ * fault raced against a hole punch operation. If this
+ * happens the pte that was hole punched will have been
+ * unmapped and the radix tree entry will have been removed by
+ * the time we are called, but the call will still happen. We
+ * will return all the way up to wp_pfn_shared(), where the
+ * pte_same() check will fail, eventually causing page fault
+ * to be retried by the CPU.
+ */
+ goto unlock;
+ }
+
+ error = radix_tree_insert(page_tree, index,
+ RADIX_DAX_ENTRY(sector, pmd_entry));
+ if (error)
+ goto unlock;
+
+ mapping->nrexceptional++;
+ dirty:
+ if (dirty)
+ radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
+ unlock:
+ spin_unlock_irq(&mapping->tree_lock);
+ return error;
+}
+
+static int dax_writeback_one(struct block_device *bdev,
+ struct address_space *mapping, pgoff_t index, void *entry)
+{
+ struct radix_tree_root *page_tree = &mapping->page_tree;
+ int type = RADIX_DAX_TYPE(entry);
+ struct radix_tree_node *node;
+ struct blk_dax_ctl dax;
+ void **slot;
+ int ret = 0;
+
+ spin_lock_irq(&mapping->tree_lock);
+ /*
+ * Regular page slots are stabilized by the page lock even
+ * without the tree itself locked. These unlocked entries
+ * need verification under the tree lock.
+ */
+ if (!__radix_tree_lookup(page_tree, index, &node, &slot))
+ goto unlock;
+ if (*slot != entry)
+ goto unlock;
+
+ /* another fsync thread may have already written back this entry */
+ if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
+ goto unlock;
+
+ if (WARN_ON_ONCE(type != RADIX_DAX_PTE && type != RADIX_DAX_PMD)) {
+ ret = -EIO;
+ goto unlock;
+ }
+
+ dax.sector = RADIX_DAX_SECTOR(entry);
+ dax.size = (type == RADIX_DAX_PMD ? PMD_SIZE : PAGE_SIZE);
+ spin_unlock_irq(&mapping->tree_lock);
+
+ /*
+ * We cannot hold tree_lock while calling dax_map_atomic() because it
+ * eventually calls cond_resched().
+ */
+ ret = dax_map_atomic(bdev, &dax);
+ if (ret < 0)
+ return ret;
+
+ if (WARN_ON_ONCE(ret < dax.size)) {
+ ret = -EIO;
+ goto unmap;
+ }
+
+ wb_cache_pmem(dax.addr, dax.size);
+
+ spin_lock_irq(&mapping->tree_lock);
+ radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
+ spin_unlock_irq(&mapping->tree_lock);
+ unmap:
+ dax_unmap_atomic(bdev, &dax);
+ return ret;
+
+ unlock:
+ spin_unlock_irq(&mapping->tree_lock);
+ return ret;
+}
+
+/*
+ * Flush the mapping to the persistent domain within the byte range of [start,
+ * end]. This is required by data integrity operations to ensure file data is
+ * on persistent storage prior to completion of the operation.
+ */
+int dax_writeback_mapping_range(struct address_space *mapping,
+ struct block_device *bdev, struct writeback_control *wbc)
+{
+ struct inode *inode = mapping->host;
+ pgoff_t start_index, end_index, pmd_index;
+ pgoff_t indices[PAGEVEC_SIZE];
+ struct pagevec pvec;
+ bool done = false;
+ int i, ret = 0;
+ void *entry;
+
+ if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
+ return -EIO;
+
+ if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL)
+ return 0;
+
+ start_index = wbc->range_start >> PAGE_CACHE_SHIFT;
+ end_index = wbc->range_end >> PAGE_CACHE_SHIFT;
+ pmd_index = DAX_PMD_INDEX(start_index);
+
+ rcu_read_lock();
+ entry = radix_tree_lookup(&mapping->page_tree, pmd_index);
+ rcu_read_unlock();
+
+ /* see if the start of our range is covered by a PMD entry */
+ if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
+ start_index = pmd_index;
+
+ tag_pages_for_writeback(mapping, start_index, end_index);
+
+ pagevec_init(&pvec, 0);
+ while (!done) {
+ pvec.nr = find_get_entries_tag(mapping, start_index,
+ PAGECACHE_TAG_TOWRITE, PAGEVEC_SIZE,
+ pvec.pages, indices);
+
+ if (pvec.nr == 0)
+ break;
+
+ for (i = 0; i < pvec.nr; i++) {
+ if (indices[i] > end_index) {
+ done = true;
+ break;
+ }
+
+ ret = dax_writeback_one(bdev, mapping, indices[i],
+ pvec.pages[i]);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ wmb_pmem();
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
+
static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
- struct address_space *mapping = inode->i_mapping;
- sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9);
unsigned long vaddr = (unsigned long)vmf->virtual_address;
- void __pmem *addr;
- unsigned long pfn;
+ struct address_space *mapping = inode->i_mapping;
+ struct block_device *bdev = bh->b_bdev;
+ struct blk_dax_ctl dax = {
+ .sector = to_sector(bh, inode),
+ .size = bh->b_size,
+ };
pgoff_t size;
int error;
@@ -315,20 +569,23 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
goto out;
}
- error = bdev_direct_access(bh->b_bdev, sector, &addr, &pfn, bh->b_size);
- if (error < 0)
- goto out;
- if (error < PAGE_SIZE) {
- error = -EIO;
+ if (dax_map_atomic(bdev, &dax) < 0) {
+ error = PTR_ERR(dax.addr);
goto out;
}
if (buffer_unwritten(bh) || buffer_new(bh)) {
- clear_pmem(addr, PAGE_SIZE);
+ clear_pmem(dax.addr, PAGE_SIZE);
wmb_pmem();
}
+ dax_unmap_atomic(bdev, &dax);
- error = vm_insert_mixed(vma, vaddr, pfn);
+ error = dax_radix_entry(mapping, vmf->pgoff, dax.sector, false,
+ vmf->flags & FAULT_FLAG_WRITE);
+ if (error)
+ goto out;
+
+ error = vm_insert_mixed(vma, vaddr, dax.pfn);
out:
i_mmap_unlock_read(mapping);
@@ -373,6 +630,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
memset(&bh, 0, sizeof(bh));
block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits);
+ bh.b_bdev = inode->i_sb->s_bdev;
bh.b_size = PAGE_SIZE;
repeat:
@@ -422,7 +680,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
if (vmf->cow_page) {
struct page *new_page = vmf->cow_page;
if (buffer_written(&bh))
- error = copy_user_bh(new_page, &bh, blkbits, vaddr);
+ error = copy_user_bh(new_page, inode, &bh, vaddr);
else
clear_user_highpage(new_page, vaddr);
if (error)
@@ -452,6 +710,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
delete_from_page_cache(page);
unlock_page(page);
page_cache_release(page);
+ page = NULL;
}
/*
@@ -523,6 +782,24 @@ EXPORT_SYMBOL_GPL(dax_fault);
*/
#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
+static void __dax_dbg(struct buffer_head *bh, unsigned long address,
+ const char *reason, const char *fn)
+{
+ if (bh) {
+ char bname[BDEVNAME_SIZE];
+ bdevname(bh->b_bdev, bname);
+ pr_debug("%s: %s addr: %lx dev %s state %lx start %lld "
+ "length %zd fallback: %s\n", fn, current->comm,
+ address, bname, bh->b_state, (u64)bh->b_blocknr,
+ bh->b_size, reason);
+ } else {
+ pr_debug("%s: %s addr: %lx fallback: %s\n", fn,
+ current->comm, address, reason);
+ }
+}
+
+#define dax_pmd_dbg(bh, address, reason) __dax_dbg(bh, address, reason, "dax_pmd")
+
int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmd, unsigned int flags, get_block_t get_block,
dax_iodone_t complete_unwritten)
@@ -534,61 +811,83 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
unsigned blkbits = inode->i_blkbits;
unsigned long pmd_addr = address & PMD_MASK;
bool write = flags & FAULT_FLAG_WRITE;
- long length;
- void __pmem *kaddr;
+ struct block_device *bdev;
pgoff_t size, pgoff;
- sector_t block, sector;
- unsigned long pfn;
- int result = 0;
+ sector_t block;
+ int error, result = 0;
+ bool alloc = false;
- /* dax pmd mappings are broken wrt gup and fork */
+ /* dax pmd mappings require pfn_t_devmap() */
if (!IS_ENABLED(CONFIG_FS_DAX_PMD))
return VM_FAULT_FALLBACK;
/* Fall back to PTEs if we're going to COW */
- if (write && !(vma->vm_flags & VM_SHARED))
+ if (write && !(vma->vm_flags & VM_SHARED)) {
+ split_huge_pmd(vma, pmd, address);
+ dax_pmd_dbg(NULL, address, "cow write");
return VM_FAULT_FALLBACK;
+ }
/* If the PMD would extend outside the VMA */
- if (pmd_addr < vma->vm_start)
+ if (pmd_addr < vma->vm_start) {
+ dax_pmd_dbg(NULL, address, "vma start unaligned");
return VM_FAULT_FALLBACK;
- if ((pmd_addr + PMD_SIZE) > vma->vm_end)
+ }
+ if ((pmd_addr + PMD_SIZE) > vma->vm_end) {
+ dax_pmd_dbg(NULL, address, "vma end unaligned");
return VM_FAULT_FALLBACK;
+ }
pgoff = linear_page_index(vma, pmd_addr);
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (pgoff >= size)
return VM_FAULT_SIGBUS;
/* If the PMD would cover blocks out of the file */
- if ((pgoff | PG_PMD_COLOUR) >= size)
+ if ((pgoff | PG_PMD_COLOUR) >= size) {
+ dax_pmd_dbg(NULL, address,
+ "offset + huge page size > file size");
return VM_FAULT_FALLBACK;
+ }
memset(&bh, 0, sizeof(bh));
+ bh.b_bdev = inode->i_sb->s_bdev;
block = (sector_t)pgoff << (PAGE_SHIFT - blkbits);
bh.b_size = PMD_SIZE;
- length = get_block(inode, block, &bh, write);
- if (length)
+
+ if (get_block(inode, block, &bh, 0) != 0)
return VM_FAULT_SIGBUS;
- i_mmap_lock_read(mapping);
+
+ if (!buffer_mapped(&bh) && write) {
+ if (get_block(inode, block, &bh, 1) != 0)
+ return VM_FAULT_SIGBUS;
+ alloc = true;
+ }
+
+ bdev = bh.b_bdev;
/*
* If the filesystem isn't willing to tell us the length of a hole,
* just fall back to PTEs. Calling get_block 512 times in a loop
* would be silly.
*/
- if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE)
- goto fallback;
+ if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE) {
+ dax_pmd_dbg(&bh, address, "allocated block too small");
+ return VM_FAULT_FALLBACK;
+ }
/*
* If we allocated new storage, make sure no process has any
* zero pages covering this hole
*/
- if (buffer_new(&bh)) {
- i_mmap_unlock_read(mapping);
- unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0);
- i_mmap_lock_read(mapping);
+ if (alloc) {
+ loff_t lstart = pgoff << PAGE_SHIFT;
+ loff_t lend = lstart + PMD_SIZE - 1; /* inclusive */
+
+ truncate_pagecache_range(inode, lstart, lend);
}
+ i_mmap_lock_read(mapping);
+
/*
* If a truncate happened while we were allocating blocks, we may
* leave blocks allocated to the file that are beyond EOF. We can't
@@ -600,57 +899,108 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
result = VM_FAULT_SIGBUS;
goto out;
}
- if ((pgoff | PG_PMD_COLOUR) >= size)
+ if ((pgoff | PG_PMD_COLOUR) >= size) {
+ dax_pmd_dbg(&bh, address,
+ "offset + huge page size > file size");
goto fallback;
+ }
if (!write && !buffer_mapped(&bh) && buffer_uptodate(&bh)) {
spinlock_t *ptl;
pmd_t entry;
struct page *zero_page = get_huge_zero_page();
- if (unlikely(!zero_page))
+ if (unlikely(!zero_page)) {
+ dax_pmd_dbg(&bh, address, "no zero page");
goto fallback;
+ }
ptl = pmd_lock(vma->vm_mm, pmd);
if (!pmd_none(*pmd)) {
spin_unlock(ptl);
+ dax_pmd_dbg(&bh, address, "pmd already present");
goto fallback;
}
+ dev_dbg(part_to_dev(bdev->bd_part),
+ "%s: %s addr: %lx pfn: <zero> sect: %llx\n",
+ __func__, current->comm, address,
+ (unsigned long long) to_sector(&bh, inode));
+
entry = mk_pmd(zero_page, vma->vm_page_prot);
entry = pmd_mkhuge(entry);
set_pmd_at(vma->vm_mm, pmd_addr, pmd, entry);
result = VM_FAULT_NOPAGE;
spin_unlock(ptl);
} else {
- sector = bh.b_blocknr << (blkbits - 9);
- length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn,
- bh.b_size);
+ struct blk_dax_ctl dax = {
+ .sector = to_sector(&bh, inode),
+ .size = PMD_SIZE,
+ };
+ long length = dax_map_atomic(bdev, &dax);
+
if (length < 0) {
result = VM_FAULT_SIGBUS;
goto out;
}
- if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
+ if (length < PMD_SIZE) {
+ dax_pmd_dbg(&bh, address, "dax-length too small");
+ dax_unmap_atomic(bdev, &dax);
goto fallback;
+ }
+ if (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR) {
+ dax_pmd_dbg(&bh, address, "pfn unaligned");
+ dax_unmap_atomic(bdev, &dax);
+ goto fallback;
+ }
- /*
- * TODO: teach vmf_insert_pfn_pmd() to support
- * 'pte_special' for pmds
- */
- if (pfn_valid(pfn))
+ if (!pfn_t_devmap(dax.pfn)) {
+ dax_unmap_atomic(bdev, &dax);
+ dax_pmd_dbg(&bh, address, "pfn not in memmap");
goto fallback;
+ }
if (buffer_unwritten(&bh) || buffer_new(&bh)) {
- int i;
- for (i = 0; i < PTRS_PER_PMD; i++)
- clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE);
+ clear_pmem(dax.addr, PMD_SIZE);
wmb_pmem();
count_vm_event(PGMAJFAULT);
mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
result |= VM_FAULT_MAJOR;
}
+ dax_unmap_atomic(bdev, &dax);
- result |= vmf_insert_pfn_pmd(vma, address, pmd, pfn, write);
+ /*
+ * For PTE faults we insert a radix tree entry for reads, and
+ * leave it clean. Then on the first write we dirty the radix
+ * tree entry via the dax_pfn_mkwrite() path. This sequence
+ * allows the dax_pfn_mkwrite() call to be simpler and avoid a
+ * call into get_block() to translate the pgoff to a sector in
+ * order to be able to create a new radix tree entry.
+ *
+ * The PMD path doesn't have an equivalent to
+ * dax_pfn_mkwrite(), though, so for a read followed by a
+ * write we traverse all the way through __dax_pmd_fault()
+ * twice. This means we can just skip inserting a radix tree
+ * entry completely on the initial read and just wait until
+ * the write to insert a dirty entry.
+ */
+ if (write) {
+ error = dax_radix_entry(mapping, pgoff, dax.sector,
+ true, true);
+ if (error) {
+ dax_pmd_dbg(&bh, address,
+ "PMD radix insertion failed");
+ goto fallback;
+ }
+ }
+
+ dev_dbg(part_to_dev(bdev->bd_part),
+ "%s: %s addr: %lx pfn: %lx sect: %llx\n",
+ __func__, current->comm, address,
+ pfn_t_to_pfn(dax.pfn),
+ (unsigned long long) dax.sector);
+ result |= vmf_insert_pfn_pmd(vma, address, pmd,
+ dax.pfn, write);
}
out:
@@ -702,15 +1052,27 @@ EXPORT_SYMBOL_GPL(dax_pmd_fault);
* dax_pfn_mkwrite - handle first write to DAX page
* @vma: The virtual memory area where the fault occurred
* @vmf: The description of the fault
- *
*/
int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
- struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+ struct file *file = vma->vm_file;
+ int error;
+
+ /*
+ * We pass NO_SECTOR to dax_radix_entry() because we expect that a
+ * RADIX_DAX_PTE entry already exists in the radix tree from a
+ * previous call to __dax_fault(). We just want to look up that PTE
+ * entry using vmf->pgoff and make sure the dirty tag is set. This
+ * saves us from having to make a call to get_block() here to look
+ * up the sector.
+ */
+ error = dax_radix_entry(file->f_mapping, vmf->pgoff, NO_SECTOR, false,
+ true);
- sb_start_pagefault(sb);
- file_update_time(vma->vm_file);
- sb_end_pagefault(sb);
+ if (error == -ENOMEM)
+ return VM_FAULT_OOM;
+ if (error)
+ return VM_FAULT_SIGBUS;
return VM_FAULT_NOPAGE;
}
EXPORT_SYMBOL_GPL(dax_pfn_mkwrite);
@@ -747,17 +1109,23 @@ int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
BUG_ON((offset + length) > PAGE_CACHE_SIZE);
memset(&bh, 0, sizeof(bh));
+ bh.b_bdev = inode->i_sb->s_bdev;
bh.b_size = PAGE_CACHE_SIZE;
err = get_block(inode, index, &bh, 0);
if (err < 0)
return err;
if (buffer_written(&bh)) {
- void __pmem *addr;
- err = dax_get_addr(&bh, &addr, inode->i_blkbits);
- if (err < 0)
- return err;
- clear_pmem(addr + offset, length);
+ struct block_device *bdev = bh.b_bdev;
+ struct blk_dax_ctl dax = {
+ .sector = to_sector(&bh, inode),
+ .size = PAGE_CACHE_SIZE,
+ };
+
+ if (dax_map_atomic(bdev, &dax) < 0)
+ return PTR_ERR(dax.addr);
+ clear_pmem(dax.addr + offset, length);
wmb_pmem();
+ dax_unmap_atomic(bdev, &dax);
}
return 0;
diff --git a/fs/dcache.c b/fs/dcache.c
index 927ed93af..2398f9f94 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1156,7 +1156,7 @@ enum d_walk_ret {
*
* The @enter() and @finish() callbacks are called with d_lock held.
*/
-void d_walk(struct dentry *parent, void *data,
+static void d_walk(struct dentry *parent, void *data,
enum d_walk_ret (*enter)(void *, struct dentry *),
void (*finish)(void *))
{
@@ -1261,7 +1261,6 @@ rename_retry:
seq = 1;
goto again;
}
-EXPORT_SYMBOL_GPL(d_walk);
/*
* Search for at least 1 mount point in the dentry's subdirs.
@@ -1561,7 +1560,8 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
if (name->len > DNAME_INLINE_LEN-1) {
size_t size = offsetof(struct external_name, name[1]);
- struct external_name *p = kmalloc(size + name->len, GFP_KERNEL);
+ struct external_name *p = kmalloc(size + name->len,
+ GFP_KERNEL_ACCOUNT);
if (!p) {
kmem_cache_free(dentry_cache, dentry);
return NULL;
@@ -1724,7 +1724,7 @@ static unsigned d_flags_for_inode(struct inode *inode)
}
if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
- if (unlikely(inode->i_op->follow_link)) {
+ if (unlikely(inode->i_op->get_link)) {
add_flags = DCACHE_SYMLINK_TYPE;
goto type_determined;
}
@@ -2452,7 +2452,7 @@ EXPORT_SYMBOL(d_rehash);
*/
void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
{
- BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
+ BUG_ON(!inode_is_locked(dentry->d_parent->d_inode));
BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
spin_lock(&dentry->d_lock);
@@ -2728,7 +2728,7 @@ static int __d_unalias(struct inode *inode,
if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
goto out_err;
m1 = &dentry->d_sb->s_vfs_rename_mutex;
- if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
+ if (!inode_trylock(alias->d_parent->d_inode))
goto out_err;
m2 = &alias->d_parent->d_inode->i_mutex;
out_unalias:
@@ -3294,18 +3294,18 @@ out:
* @new_dentry: new dentry
* @old_dentry: old dentry
*
- * Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
- * Returns 0 otherwise.
+ * Returns true if new_dentry is a subdirectory of the parent (at any depth).
+ * Returns false otherwise.
* Caller must ensure that "new_dentry" is pinned before calling is_subdir()
*/
-int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
+bool is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
{
- int result;
+ bool result;
unsigned seq;
if (new_dentry == old_dentry)
- return 1;
+ return true;
do {
/* for restarting inner loop in case of seq retry */
@@ -3316,9 +3316,9 @@ int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
*/
rcu_read_lock();
if (d_ancestor(old_dentry, new_dentry))
- result = 1;
+ result = true;
else
- result = 0;
+ result = false;
rcu_read_unlock();
} while (read_seqretry(&rename_lock, seq));
@@ -3406,7 +3406,7 @@ static void __init dcache_init(void)
* of the dcache.
*/
dentry_cache = KMEM_CACHE(dentry,
- SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
+ SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT);
/* Hash may have been set up in dcache_init_early */
if (!hashdist)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index b7fcc0de0..bece948b3 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -265,7 +265,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
if (!parent)
parent = debugfs_mount->mnt_root;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
dentry = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(dentry) && d_really_is_positive(dentry)) {
dput(dentry);
@@ -273,7 +273,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
}
if (IS_ERR(dentry)) {
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
}
@@ -282,7 +282,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
static struct dentry *failed_creating(struct dentry *dentry)
{
- mutex_unlock(&d_inode(dentry->d_parent)->i_mutex);
+ inode_unlock(d_inode(dentry->d_parent));
dput(dentry);
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
return NULL;
@@ -290,7 +290,7 @@ static struct dentry *failed_creating(struct dentry *dentry)
static struct dentry *end_creating(struct dentry *dentry)
{
- mutex_unlock(&d_inode(dentry->d_parent)->i_mutex);
+ inode_unlock(d_inode(dentry->d_parent));
return dentry;
}
@@ -560,9 +560,9 @@ void debugfs_remove(struct dentry *dentry)
if (!parent || d_really_is_negative(parent))
return;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
ret = __debugfs_remove(dentry, parent);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
if (!ret)
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
}
@@ -594,7 +594,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
parent = dentry;
down:
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
loop:
/*
* The parent->d_subdirs is protected by the d_lock. Outside that
@@ -609,7 +609,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
/* perhaps simple_empty(child) makes more sense */
if (!list_empty(&child->d_subdirs)) {
spin_unlock(&parent->d_lock);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
parent = child;
goto down;
}
@@ -630,10 +630,10 @@ void debugfs_remove_recursive(struct dentry *dentry)
}
spin_unlock(&parent->d_lock);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
child = parent;
parent = parent->d_parent;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
if (child != dentry)
/* go up */
@@ -641,7 +641,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
if (!__debugfs_remove(child, parent))
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
}
EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 706de324f..655f21f99 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -255,7 +255,7 @@ static int mknod_ptmx(struct super_block *sb)
if (!uid_valid(root_uid) || !gid_valid(root_gid))
return -EINVAL;
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
/* If we have already created ptmx node, return */
if (fsi->ptmx_dentry) {
@@ -292,7 +292,7 @@ static int mknod_ptmx(struct super_block *sb)
fsi->ptmx_dentry = dentry;
rc = 0;
out:
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
return rc;
}
@@ -635,7 +635,7 @@ struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
sprintf(s, "%d", index);
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
dentry = d_alloc_name(root, s);
if (dentry) {
@@ -646,7 +646,7 @@ struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
inode = ERR_PTR(-ENOMEM);
}
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
return inode;
}
@@ -691,7 +691,7 @@ void devpts_pty_kill(struct inode *inode)
BUG_ON(inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR));
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
dentry = d_find_alias(inode);
@@ -700,7 +700,7 @@ void devpts_pty_kill(struct inode *inode)
dput(dentry); /* d_alloc_name() in devpts_pty_new() */
dput(dentry); /* d_find_alias above */
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
}
static int __init init_devpts_fs(void)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 01171d8a6..d6a9012d4 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1157,12 +1157,12 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
iocb->ki_filp->f_mapping;
/* will be released by direct_io_worker */
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
retval = filemap_write_and_wait_range(mapping, offset,
end - 1);
if (retval) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
kmem_cache_free(dio_cache, dio);
goto out;
}
@@ -1173,7 +1173,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
dio->i_size = i_size_read(inode);
if (iov_iter_rw(iter) == READ && offset >= dio->i_size) {
if (dio->flags & DIO_LOCKING)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
kmem_cache_free(dio_cache, dio);
retval = 0;
goto out;
@@ -1295,7 +1295,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
* of protecting us from looking up uninitialized blocks.
*/
if (iov_iter_rw(iter) == READ && (dio->flags & DIO_LOCKING))
- mutex_unlock(&dio->inode->i_mutex);
+ inode_unlock(dio->inode);
/*
* The only time we want to leave bios in flight is when a successful
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 173b3873a..58c2f4a21 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -515,14 +515,9 @@ static ssize_t device_write(struct file *file, const char __user *buf,
if (count > sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN)
return -EINVAL;
- kbuf = kzalloc(count + 1, GFP_NOFS);
- if (!kbuf)
- return -ENOMEM;
-
- if (copy_from_user(kbuf, buf, count)) {
- error = -EFAULT;
- goto out_free;
- }
+ kbuf = memdup_user_nul(buf, count);
+ if (IS_ERR(kbuf))
+ return PTR_ERR(kbuf);
if (check_version(kbuf)) {
error = -EBADE;
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 4f591f190..d72d52b90 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -8,7 +8,6 @@
#include <linux/writeback.h>
#include <linux/sysctl.h>
#include <linux/gfp.h>
-#include <linux/export.h>
#include "internal.h"
/* A global variable is a bit ugly, but it keeps the code simple */
@@ -40,12 +39,6 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
iput(toput_inode);
}
-/* For TuxOnIce */
-void drop_pagecache(void)
-{
- iterate_supers(drop_pagecache_sb, NULL);
-}
-
int drop_caches_sysctl_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index e2e47ba5d..4e685ac10 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -41,13 +41,13 @@ static struct dentry *lock_parent(struct dentry *dentry)
struct dentry *dir;
dir = dget_parent(dentry);
- mutex_lock_nested(&(d_inode(dir)->i_mutex), I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
return dir;
}
static void unlock_dir(struct dentry *dir)
{
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
dput(dir);
}
@@ -282,9 +282,7 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
if (rc) {
ecryptfs_do_unlink(directory_inode, ecryptfs_dentry,
ecryptfs_inode);
- make_bad_inode(ecryptfs_inode);
- unlock_new_inode(ecryptfs_inode);
- iput(ecryptfs_inode);
+ iget_failed(ecryptfs_inode);
goto out;
}
unlock_new_inode(ecryptfs_inode);
@@ -399,11 +397,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
int rc = 0;
lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
- mutex_lock(&d_inode(lower_dir_dentry)->i_mutex);
+ inode_lock(d_inode(lower_dir_dentry));
lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
lower_dir_dentry,
ecryptfs_dentry->d_name.len);
- mutex_unlock(&d_inode(lower_dir_dentry)->i_mutex);
+ inode_unlock(d_inode(lower_dir_dentry));
if (IS_ERR(lower_dentry)) {
rc = PTR_ERR(lower_dentry);
ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
@@ -428,11 +426,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
"filename; rc = [%d]\n", __func__, rc);
goto out;
}
- mutex_lock(&d_inode(lower_dir_dentry)->i_mutex);
+ inode_lock(d_inode(lower_dir_dentry));
lower_dentry = lookup_one_len(encrypted_and_encoded_name,
lower_dir_dentry,
encrypted_and_encoded_name_size);
- mutex_unlock(&d_inode(lower_dir_dentry)->i_mutex);
+ inode_unlock(d_inode(lower_dir_dentry));
if (IS_ERR(lower_dentry)) {
rc = PTR_ERR(lower_dentry);
ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
@@ -674,16 +672,24 @@ out:
return rc ? ERR_PTR(rc) : buf;
}
-static const char *ecryptfs_follow_link(struct dentry *dentry, void **cookie)
+static const char *ecryptfs_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
size_t len;
- char *buf = ecryptfs_readlink_lower(dentry, &len);
+ char *buf;
+
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
+ buf = ecryptfs_readlink_lower(dentry, &len);
if (IS_ERR(buf))
return buf;
fsstack_copy_attr_atime(d_inode(dentry),
d_inode(ecryptfs_dentry_to_lower(dentry)));
buf[len] = '\0';
- return *cookie = buf;
+ set_delayed_call(done, kfree_link, buf);
+ return buf;
}
/**
@@ -863,9 +869,9 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
if (!rc && lower_ia.ia_valid & ATTR_SIZE) {
struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
- mutex_lock(&d_inode(lower_dentry)->i_mutex);
+ inode_lock(d_inode(lower_dentry));
rc = notify_change(lower_dentry, &lower_ia, NULL);
- mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+ inode_unlock(d_inode(lower_dentry));
}
return rc;
}
@@ -964,9 +970,9 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
lower_ia.ia_valid &= ~ATTR_MODE;
- mutex_lock(&d_inode(lower_dentry)->i_mutex);
+ inode_lock(d_inode(lower_dentry));
rc = notify_change(lower_dentry, &lower_ia, NULL);
- mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+ inode_unlock(d_inode(lower_dentry));
out:
fsstack_copy_attr_all(inode, lower_inode);
return rc;
@@ -1042,10 +1048,10 @@ ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name,
rc = -EOPNOTSUPP;
goto out;
}
- mutex_lock(&d_inode(lower_dentry)->i_mutex);
+ inode_lock(d_inode(lower_dentry));
rc = d_inode(lower_dentry)->i_op->getxattr(lower_dentry, name, value,
size);
- mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+ inode_unlock(d_inode(lower_dentry));
out:
return rc;
}
@@ -1069,9 +1075,9 @@ ecryptfs_listxattr(struct dentry *dentry, char *list, size_t size)
rc = -EOPNOTSUPP;
goto out;
}
- mutex_lock(&d_inode(lower_dentry)->i_mutex);
+ inode_lock(d_inode(lower_dentry));
rc = d_inode(lower_dentry)->i_op->listxattr(lower_dentry, list, size);
- mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+ inode_unlock(d_inode(lower_dentry));
out:
return rc;
}
@@ -1086,17 +1092,16 @@ static int ecryptfs_removexattr(struct dentry *dentry, const char *name)
rc = -EOPNOTSUPP;
goto out;
}
- mutex_lock(&d_inode(lower_dentry)->i_mutex);
+ inode_lock(d_inode(lower_dentry));
rc = d_inode(lower_dentry)->i_op->removexattr(lower_dentry, name);
- mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+ inode_unlock(d_inode(lower_dentry));
out:
return rc;
}
const struct inode_operations ecryptfs_symlink_iops = {
.readlink = generic_readlink,
- .follow_link = ecryptfs_follow_link,
- .put_link = kfree_put_link,
+ .get_link = ecryptfs_get_link,
.permission = ecryptfs_permission,
.setattr = ecryptfs_setattr,
.getattr = ecryptfs_getattr_link,
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 4f4d0474b..e25b6b06b 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -663,6 +663,7 @@ static struct ecryptfs_cache_info {
struct kmem_cache **cache;
const char *name;
size_t size;
+ unsigned long flags;
void (*ctor)(void *obj);
} ecryptfs_cache_infos[] = {
{
@@ -684,6 +685,7 @@ static struct ecryptfs_cache_info {
.cache = &ecryptfs_inode_info_cache,
.name = "ecryptfs_inode_cache",
.size = sizeof(struct ecryptfs_inode_info),
+ .flags = SLAB_ACCOUNT,
.ctor = inode_info_init_once,
},
{
@@ -755,8 +757,8 @@ static int ecryptfs_init_kmem_caches(void)
struct ecryptfs_cache_info *info;
info = &ecryptfs_cache_infos[i];
- *(info->cache) = kmem_cache_create(info->name, info->size,
- 0, SLAB_HWCACHE_ALIGN, info->ctor);
+ *(info->cache) = kmem_cache_create(info->name, info->size, 0,
+ SLAB_HWCACHE_ALIGN | info->flags, info->ctor);
if (!*(info->cache)) {
ecryptfs_free_kmem_caches();
ecryptfs_printk(KERN_WARNING, "%s: "
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index caba848ac..c6ced4cbf 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -436,7 +436,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
rc = -ENOMEM;
goto out;
}
- mutex_lock(&lower_inode->i_mutex);
+ inode_lock(lower_inode);
size = lower_inode->i_op->getxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
xattr_virt, PAGE_CACHE_SIZE);
if (size < 0)
@@ -444,7 +444,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
put_unaligned_be64(i_size_read(ecryptfs_inode), xattr_virt);
rc = lower_inode->i_op->setxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
xattr_virt, size, 0);
- mutex_unlock(&lower_inode->i_mutex);
+ inode_unlock(lower_inode);
if (rc)
printk(KERN_ERR "Error whilst attempting to write inode size "
"to lower file xattr; rc = [%d]\n", rc);
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index 66842e55c..d48e0d261 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -51,9 +51,9 @@ static ssize_t efivarfs_file_write(struct file *file,
d_delete(file->f_path.dentry);
dput(file->f_path.dentry);
} else {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
i_size_write(inode, datasize + sizeof(attributes));
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
bytes = count;
@@ -148,9 +148,9 @@ efivarfs_ioc_setxflags(struct file *file, void __user *arg)
if (error)
return error;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
inode_set_flags(inode, i_flags, S_IMMUTABLE);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
mnt_drop_write_file(file);
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index abb244b06..dd029d13e 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -164,10 +164,10 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
efivar_entry_size(entry, &size);
efivar_entry_add(entry, &efivarfs_list);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
inode->i_private = entry;
i_size_write(inode, size + sizeof(entry->var.Attributes));
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
d_add(dentry, inode);
return 0;
diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index 079d20306..cdf087238 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -151,6 +151,7 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino)
break;
case S_IFLNK:
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_data.a_ops = &efs_symlink_aops;
break;
case S_IFCHR:
diff --git a/fs/efs/super.c b/fs/efs/super.c
index c8411a30f..cb68dac4f 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -94,9 +94,9 @@ static void init_once(void *foo)
static int __init init_inodecache(void)
{
efs_inode_cachep = kmem_cache_create("efs_inode_cache",
- sizeof(struct efs_inode_info),
- 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
- init_once);
+ sizeof(struct efs_inode_info), 0,
+ SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
+ SLAB_ACCOUNT, init_once);
if (efs_inode_cachep == NULL)
return -ENOMEM;
return 0;
diff --git a/fs/efs/symlink.c b/fs/efs/symlink.c
index 75117d0da..4870cc82d 100644
--- a/fs/efs/symlink.c
+++ b/fs/efs/symlink.c
@@ -13,7 +13,7 @@
static int efs_symlink_readpage(struct file *file, struct page *page)
{
- char *link = kmap(page);
+ char *link = page_address(page);
struct buffer_head * bh;
struct inode * inode = page->mapping->host;
efs_block_t size = inode->i_size;
@@ -39,12 +39,10 @@ static int efs_symlink_readpage(struct file *file, struct page *page)
}
link[size] = '\0';
SetPageUptodate(page);
- kunmap(page);
unlock_page(page);
return 0;
fail:
SetPageError(page);
- kunmap(page);
unlock_page(page);
return err;
}
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 8d0c0df01..ed70cf9fd 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -45,10 +45,10 @@ struct eventfd_ctx {
*
* This function is supposed to be called by the kernel in paths that do not
* allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
- * value, and we signal this as overflow condition by returining a POLLERR
+ * value, and we signal this as overflow condition by returning a POLLERR
* to poll(2).
*
- * Returns the amount by which the counter was incrememnted. This will be less
+ * Returns the amount by which the counter was incremented. This will be less
* than @n if the counter has overflowed.
*/
__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 1e009cad8..cde60741c 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -92,7 +92,12 @@
*/
/* Epoll private bits inside the event mask */
-#define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET)
+#define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET | EPOLLEXCLUSIVE)
+
+#define EPOLLINOUT_BITS (POLLIN | POLLOUT)
+
+#define EPOLLEXCLUSIVE_OK_BITS (EPOLLINOUT_BITS | POLLERR | POLLHUP | \
+ EPOLLWAKEUP | EPOLLET | EPOLLEXCLUSIVE)
/* Maximum number of nesting allowed inside epoll sets */
#define EP_MAX_NESTS 4
@@ -1002,6 +1007,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
unsigned long flags;
struct epitem *epi = ep_item_from_wait(wait);
struct eventpoll *ep = epi->ep;
+ int ewake = 0;
if ((unsigned long)key & POLLFREE) {
ep_pwq_from_wait(wait)->whead = NULL;
@@ -1066,8 +1072,25 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
* Wake up ( if active ) both the eventpoll wait list and the ->poll()
* wait list.
*/
- if (waitqueue_active(&ep->wq))
+ if (waitqueue_active(&ep->wq)) {
+ if ((epi->event.events & EPOLLEXCLUSIVE) &&
+ !((unsigned long)key & POLLFREE)) {
+ switch ((unsigned long)key & EPOLLINOUT_BITS) {
+ case POLLIN:
+ if (epi->event.events & POLLIN)
+ ewake = 1;
+ break;
+ case POLLOUT:
+ if (epi->event.events & POLLOUT)
+ ewake = 1;
+ break;
+ case 0:
+ ewake = 1;
+ break;
+ }
+ }
wake_up_locked(&ep->wq);
+ }
if (waitqueue_active(&ep->poll_wait))
pwake++;
@@ -1078,6 +1101,9 @@ out_unlock:
if (pwake)
ep_poll_safewake(&ep->poll_wait);
+ if (epi->event.events & EPOLLEXCLUSIVE)
+ return ewake;
+
return 1;
}
@@ -1095,7 +1121,10 @@ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
pwq->whead = whead;
pwq->base = epi;
- add_wait_queue(whead, &pwq->wait);
+ if (epi->event.events & EPOLLEXCLUSIVE)
+ add_wait_queue_exclusive(whead, &pwq->wait);
+ else
+ add_wait_queue(whead, &pwq->wait);
list_add_tail(&pwq->llink, &epi->pwqlist);
epi->nwait++;
} else {
@@ -1862,6 +1891,19 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
goto error_tgt_fput;
/*
+ * epoll adds to the wakeup queue at EPOLL_CTL_ADD time only,
+ * so EPOLLEXCLUSIVE is not allowed for a EPOLL_CTL_MOD operation.
+ * Also, we do not currently supported nested exclusive wakeups.
+ */
+ if (epds.events & EPOLLEXCLUSIVE) {
+ if (op == EPOLL_CTL_MOD)
+ goto error_tgt_fput;
+ if (op == EPOLL_CTL_ADD && (is_file_epoll(tf.file) ||
+ (epds.events & ~EPOLLEXCLUSIVE_OK_BITS)))
+ goto error_tgt_fput;
+ }
+
+ /*
* At this point it is safe to assume that the "private_data" contains
* our own data structure.
*/
@@ -1932,8 +1974,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
break;
case EPOLL_CTL_MOD:
if (epi) {
- epds.events |= POLLERR | POLLHUP;
- error = ep_modify(ep, epi, &epds);
+ if (!(epi->event.events & EPOLLEXCLUSIVE)) {
+ epds.events |= POLLERR | POLLHUP;
+ error = ep_modify(ep, epi, &epds);
+ }
} else
error = -ENOENT;
break;
diff --git a/fs/exec.c b/fs/exec.c
index 203f822aa..2af88108e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -106,7 +106,6 @@ bool path_noexec(const struct path *path)
return (path->mnt->mnt_flags & MNT_NOEXEC) ||
(path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
}
-EXPORT_SYMBOL_GPL(path_noexec);
#ifdef CONFIG_USELIB
/*
@@ -123,7 +122,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
int error = PTR_ERR(tmp);
static const struct open_flags uselib_flags = {
.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
- .acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN,
+ .acc_mode = MAY_READ | MAY_EXEC,
.intent = LOOKUP_OPEN,
.lookup_flags = LOOKUP_FOLLOW,
};
@@ -767,7 +766,7 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
int err;
struct open_flags open_exec_flags = {
.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
- .acc_mode = MAY_EXEC | MAY_OPEN,
+ .acc_mode = MAY_EXEC,
.intent = LOOKUP_OPEN,
.lookup_flags = LOOKUP_FOLLOW,
};
@@ -1314,13 +1313,13 @@ static void bprm_fill_uid(struct linux_binprm *bprm)
return;
/* Be careful if suid/sgid is set */
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* reload atomically mode/uid/gid now that lock held */
mode = inode->i_mode;
uid = inode->i_uid;
gid = inode->i_gid;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/* We ignore suid/sgid if there are no mappings for them in the ns */
if (!kuid_has_mapping(bprm->cred->user_ns, uid) ||
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 906de66e8..28645f064 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -52,9 +52,9 @@ static int exofs_file_fsync(struct file *filp, loff_t start, loff_t end,
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = sync_inode_metadata(filp->f_mapping->host, 1);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 60f03b789..9eaf595ae 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1224,6 +1224,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
inode->i_link = (char *)oi->i_data;
} else {
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &exofs_aops;
}
} else {
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 994e078da..c20d77df2 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -111,6 +111,7 @@ static int exofs_symlink(struct inode *dir, struct dentry *dentry,
if (l > sizeof(oi->i_data)) {
/* slow symlink */
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &exofs_aops;
memset(oi->i_data, 0, sizeof(oi->i_data));
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index b795c567b..6658a5053 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -194,8 +194,8 @@ static int init_inodecache(void)
{
exofs_inode_cachep = kmem_cache_create("exofs_inode_cache",
sizeof(struct exofs_i_info), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
- exofs_init_once);
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD |
+ SLAB_ACCOUNT, exofs_init_once);
if (exofs_inode_cachep == NULL)
return -ENOMEM;
return 0;
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 714cd37a6..c46f1a190 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -124,10 +124,10 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
int err;
parent = ERR_PTR(-EACCES);
- mutex_lock(&dentry->d_inode->i_mutex);
+ inode_lock(dentry->d_inode);
if (mnt->mnt_sb->s_export_op->get_parent)
parent = mnt->mnt_sb->s_export_op->get_parent(dentry);
- mutex_unlock(&dentry->d_inode->i_mutex);
+ inode_unlock(dentry->d_inode);
if (IS_ERR(parent)) {
dprintk("%s: get_parent of %ld failed, err %d\n",
@@ -143,9 +143,9 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
if (err)
goto out_err;
dprintk("%s: found name: %s\n", __func__, nbuf);
- mutex_lock(&parent->d_inode->i_mutex);
+ inode_lock(parent->d_inode);
tmp = lookup_one_len(nbuf, parent, strlen(nbuf));
- mutex_unlock(&parent->d_inode->i_mutex);
+ inode_unlock(parent->d_inode);
if (IS_ERR(tmp)) {
dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp));
goto out_err;
@@ -503,10 +503,10 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
*/
err = exportfs_get_name(mnt, target_dir, nbuf, result);
if (!err) {
- mutex_lock(&target_dir->d_inode->i_mutex);
+ inode_lock(target_dir->d_inode);
nresult = lookup_one_len(nbuf, target_dir,
strlen(nbuf));
- mutex_unlock(&target_dir->d_inode->i_mutex);
+ inode_unlock(target_dir->d_inode);
if (!IS_ERR(nresult)) {
if (nresult->d_inode) {
dput(result);
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 11a42c5a0..c1400b109 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -80,30 +80,13 @@ static int ext2_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
return ret;
}
-static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- struct inode *inode = file_inode(vma->vm_file);
- struct ext2_inode_info *ei = EXT2_I(inode);
- int ret;
-
- sb_start_pagefault(inode->i_sb);
- file_update_time(vma->vm_file);
- down_read(&ei->dax_sem);
-
- ret = __dax_mkwrite(vma, vmf, ext2_get_block, NULL);
-
- up_read(&ei->dax_sem);
- sb_end_pagefault(inode->i_sb);
- return ret;
-}
-
static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
struct vm_fault *vmf)
{
struct inode *inode = file_inode(vma->vm_file);
struct ext2_inode_info *ei = EXT2_I(inode);
- int ret = VM_FAULT_NOPAGE;
loff_t size;
+ int ret;
sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
@@ -113,6 +96,8 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;
+ else
+ ret = dax_pfn_mkwrite(vma, vmf);
up_read(&ei->dax_sem);
sb_end_pagefault(inode->i_sb);
@@ -122,7 +107,7 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
static const struct vm_operations_struct ext2_dax_vm_ops = {
.fault = ext2_dax_fault,
.pmd_fault = ext2_dax_pmd_fault,
- .page_mkwrite = ext2_dax_mkwrite,
+ .page_mkwrite = ext2_dax_fault,
.pfn_mkwrite = ext2_dax_pfn_mkwrite,
};
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 0aa9bf6e6..6bd58e6ff 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -737,8 +737,10 @@ static int ext2_get_blocks(struct inode *inode,
* so that it's not found by another thread before it's
* initialised
*/
- err = dax_clear_blocks(inode, le32_to_cpu(chain[depth-1].key),
- 1 << inode->i_blkbits);
+ err = dax_clear_sectors(inode->i_sb->s_bdev,
+ le32_to_cpu(chain[depth-1].key) <<
+ (inode->i_blkbits - 9),
+ 1 << inode->i_blkbits);
if (err) {
mutex_unlock(&ei->truncate_mutex);
goto cleanup;
@@ -874,6 +876,14 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
static int
ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
+#ifdef CONFIG_FS_DAX
+ if (dax_mapping(mapping)) {
+ return dax_writeback_mapping_range(mapping,
+ mapping->host->i_sb->s_bdev,
+ wbc);
+ }
+#endif
+
return mpage_writepages(mapping, wbc, ext2_get_block);
}
@@ -1296,7 +1306,7 @@ void ext2_set_inode_flags(struct inode *inode)
inode->i_flags |= S_NOATIME;
if (flags & EXT2_DIRSYNC_FL)
inode->i_flags |= S_DIRSYNC;
- if (test_opt(inode->i_sb, DAX))
+ if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
inode->i_flags |= S_DAX;
}
@@ -1420,6 +1430,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
sizeof(ei->i_data) - 1);
} else {
inode->i_op = &ext2_symlink_inode_operations;
+ inode_nohighmem(inode);
if (test_opt(inode->i_sb, NOBH))
inode->i_mapping->a_ops = &ext2_nobh_aops;
else
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 5d46c0986..b386af2e4 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -51,10 +51,10 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
flags = ext2_mask_flags(inode->i_mode, flags);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* Is it quota file? Do not allow user to mess with it */
if (IS_NOQUOTA(inode)) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
ret = -EPERM;
goto setflags_out;
}
@@ -68,7 +68,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
*/
if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
if (!capable(CAP_LINUX_IMMUTABLE)) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
ret = -EPERM;
goto setflags_out;
}
@@ -80,7 +80,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
ext2_set_inode_flags(inode);
inode->i_ctime = CURRENT_TIME_SEC;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
mark_inode_dirty(inode);
setflags_out:
@@ -102,10 +102,10 @@ setflags_out:
goto setversion_out;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
inode->i_ctime = CURRENT_TIME_SEC;
inode->i_generation = generation;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
mark_inode_dirty(inode);
setversion_out:
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 3267a80db..7a2be8f7f 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -183,6 +183,7 @@ static int ext2_symlink (struct inode * dir, struct dentry * dentry,
if (l > sizeof (EXT2_I(inode)->i_data)) {
/* slow symlink */
inode->i_op = &ext2_symlink_inode_operations;
+ inode_nohighmem(inode);
if (test_opt(inode->i_sb, NOBH))
inode->i_mapping->a_ops = &ext2_nobh_aops;
else
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 748d35afc..2a188413a 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -203,7 +203,7 @@ static int __init init_inodecache(void)
ext2_inode_cachep = kmem_cache_create("ext2_inode_cache",
sizeof(struct ext2_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
init_once);
if (ext2_inode_cachep == NULL)
return -ENOMEM;
diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c
index ae17179f3..3495d8ae4 100644
--- a/fs/ext2/symlink.c
+++ b/fs/ext2/symlink.c
@@ -22,8 +22,7 @@
const struct inode_operations ext2_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.setattr = ext2_setattr,
#ifdef CONFIG_EXT2_FS_XATTR
.setxattr = generic_setxattr,
@@ -35,7 +34,7 @@ const struct inode_operations ext2_symlink_inode_operations = {
const struct inode_operations ext2_fast_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = simple_follow_link,
+ .get_link = simple_get_link,
.setattr = ext2_setattr,
#ifdef CONFIG_EXT2_FS_XATTR
.setxattr = generic_setxattr,
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index fa70848af..f57a7aba3 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -77,10 +77,8 @@
printk("\n"); \
} while (0)
# define ea_bdebug(bh, f...) do { \
- char b[BDEVNAME_SIZE]; \
- printk(KERN_DEBUG "block %s:%lu: ", \
- bdevname(bh->b_bdev, b), \
- (unsigned long) bh->b_blocknr); \
+ printk(KERN_DEBUG "block %pg:%lu: ", \
+ bh->b_bdev, (unsigned long) bh->b_blocknr); \
printk(f); \
printk("\n"); \
} while (0)
@@ -292,16 +290,21 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
const struct xattr_handler *handler =
ext2_xattr_handler(entry->e_name_index);
- if (handler) {
- size_t size = handler->list(handler, dentry, buffer,
- rest, entry->e_name,
- entry->e_name_len);
+ if (handler && (!handler->list || handler->list(dentry))) {
+ const char *prefix = handler->prefix ?: handler->name;
+ size_t prefix_len = strlen(prefix);
+ size_t size = prefix_len + entry->e_name_len + 1;
+
if (buffer) {
if (size > rest) {
error = -ERANGE;
goto cleanup;
}
- buffer += size;
+ memcpy(buffer, prefix, prefix_len);
+ buffer += prefix_len;
+ memcpy(buffer, entry->e_name, entry->e_name_len);
+ buffer += entry->e_name_len;
+ *buffer++ = 0;
}
rest -= size;
}
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index dfb087503..ba97f243b 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -7,29 +7,11 @@
#include <linux/security.h>
#include "xattr.h"
-static size_t
-ext2_xattr_security_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
-{
- const int prefix_len = XATTR_SECURITY_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
- memcpy(list+prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
-}
-
static int
ext2_xattr_security_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ext2_xattr_get(d_inode(dentry), EXT2_XATTR_INDEX_SECURITY, name,
buffer, size);
}
@@ -39,8 +21,6 @@ ext2_xattr_security_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ext2_xattr_set(d_inode(dentry), EXT2_XATTR_INDEX_SECURITY, name,
value, size, flags);
}
@@ -71,7 +51,6 @@ ext2_init_security(struct inode *inode, struct inode *dir,
const struct xattr_handler ext2_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
- .list = ext2_xattr_security_list,
.get = ext2_xattr_security_get,
.set = ext2_xattr_security_set,
};
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index 3150dd3a7..2c94d1930 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -8,23 +8,10 @@
#include "ext2.h"
#include "xattr.h"
-static size_t
-ext2_xattr_trusted_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
+static bool
+ext2_xattr_trusted_list(struct dentry *dentry)
{
- const int prefix_len = XATTR_TRUSTED_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
-
- if (!capable(CAP_SYS_ADMIN))
- return 0;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
- memcpy(list+prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
+ return capable(CAP_SYS_ADMIN);
}
static int
@@ -32,8 +19,6 @@ ext2_xattr_trusted_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ext2_xattr_get(d_inode(dentry), EXT2_XATTR_INDEX_TRUSTED, name,
buffer, size);
}
@@ -43,8 +28,6 @@ ext2_xattr_trusted_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ext2_xattr_set(d_inode(dentry), EXT2_XATTR_INDEX_TRUSTED, name,
value, size, flags);
}
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index 339a49bbb..72a2a96d6 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -10,23 +10,10 @@
#include "ext2.h"
#include "xattr.h"
-static size_t
-ext2_xattr_user_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
+static bool
+ext2_xattr_user_list(struct dentry *dentry)
{
- const size_t prefix_len = XATTR_USER_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
-
- if (!test_opt(dentry->d_sb, XATTR_USER))
- return 0;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_USER_PREFIX, prefix_len);
- memcpy(list+prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
+ return test_opt(dentry->d_sb, XATTR_USER);
}
static int
@@ -34,8 +21,6 @@ ext2_xattr_user_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
if (!test_opt(dentry->d_sb, XATTR_USER))
return -EOPNOTSUPP;
return ext2_xattr_get(d_inode(dentry), EXT2_XATTR_INDEX_USER,
@@ -47,8 +32,6 @@ ext2_xattr_user_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
if (!test_opt(dentry->d_sb, XATTR_USER))
return -EOPNOTSUPP;
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index 1a0835073..38f756248 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -384,14 +384,12 @@ int ext4_decrypt(struct page *page)
EXT4_DECRYPT, page->index, page, page);
}
-int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
+int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
+ ext4_fsblk_t pblk, ext4_lblk_t len)
{
struct ext4_crypto_ctx *ctx;
struct page *ciphertext_page = NULL;
struct bio *bio;
- ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
- ext4_fsblk_t pblk = ext4_ext_pblock(ex);
- unsigned int len = ext4_ext_get_actual_len(ex);
int ret, err = 0;
#if 0
@@ -469,3 +467,59 @@ uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size)
return size;
return 0;
}
+
+/*
+ * Validate dentries for encrypted directories to make sure we aren't
+ * potentially caching stale data after a key has been added or
+ * removed.
+ */
+static int ext4_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ struct inode *dir = d_inode(dentry->d_parent);
+ struct ext4_crypt_info *ci = EXT4_I(dir)->i_crypt_info;
+ int dir_has_key, cached_with_key;
+
+ if (!ext4_encrypted_inode(dir))
+ return 0;
+
+ if (ci && ci->ci_keyring_key &&
+ (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+ (1 << KEY_FLAG_REVOKED) |
+ (1 << KEY_FLAG_DEAD))))
+ ci = NULL;
+
+ /* this should eventually be an flag in d_flags */
+ cached_with_key = dentry->d_fsdata != NULL;
+ dir_has_key = (ci != NULL);
+
+ /*
+ * If the dentry was cached without the key, and it is a
+ * negative dentry, it might be a valid name. We can't check
+ * if the key has since been made available due to locking
+ * reasons, so we fail the validation so ext4_lookup() can do
+ * this check.
+ *
+ * We also fail the validation if the dentry was created with
+ * the key present, but we no longer have the key, or vice versa.
+ */
+ if ((!cached_with_key && d_is_negative(dentry)) ||
+ (!cached_with_key && dir_has_key) ||
+ (cached_with_key && !dir_has_key)) {
+#if 0 /* Revalidation debug */
+ char buf[80];
+ char *cp = simple_dname(dentry, buf, sizeof(buf));
+
+ if (IS_ERR(cp))
+ cp = (char *) "???";
+ pr_err("revalidate: %s %p %d %d %d\n", cp, dentry->d_fsdata,
+ cached_with_key, d_is_negative(dentry),
+ dir_has_key);
+#endif
+ return 0;
+ }
+ return 1;
+}
+
+const struct dentry_operations ext4_encrypted_d_ops = {
+ .d_revalidate = ext4_d_revalidate,
+};
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 1d1bca74f..33f5e2a50 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -111,6 +111,12 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
int dir_has_error = 0;
struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
+ if (ext4_encrypted_inode(inode)) {
+ err = ext4_get_encryption_info(inode);
+ if (err && err != -ENOKEY)
+ return err;
+ }
+
if (is_dx_dir(inode)) {
err = ext4_dx_readdir(file, ctx);
if (err != ERR_BAD_DX_DIR) {
@@ -157,8 +163,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
index, 1);
file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
bh = ext4_bread(NULL, inode, map.m_lblk, 0);
- if (IS_ERR(bh))
- return PTR_ERR(bh);
+ if (IS_ERR(bh)) {
+ err = PTR_ERR(bh);
+ bh = NULL;
+ goto errout;
+ }
}
if (!bh) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index cc7ca4e87..157b458a6 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -378,14 +378,22 @@ struct flex_groups {
#define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
-#define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */
-#define EXT4_FL_USER_MODIFIABLE 0x004380FF /* User modifiable flags */
+#define EXT4_FL_USER_VISIBLE 0x304BDFFF /* User visible flags */
+#define EXT4_FL_USER_MODIFIABLE 0x204380FF /* User modifiable flags */
+
+#define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \
+ EXT4_IMMUTABLE_FL | \
+ EXT4_APPEND_FL | \
+ EXT4_NODUMP_FL | \
+ EXT4_NOATIME_FL | \
+ EXT4_PROJINHERIT_FL)
/* Flags that should be inherited by new inodes from their parent. */
#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
- EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
+ EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
+ EXT4_PROJINHERIT_FL)
/* Flags that are appropriate for regular files (all but dir-specific ones). */
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
@@ -555,10 +563,12 @@ enum {
#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040
/* Request will not result in inode size update (user for fallocate) */
#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
- /* Do not take i_data_sem locking in ext4_map_blocks */
-#define EXT4_GET_BLOCKS_NO_LOCK 0x0100
/* Convert written extents to unwritten */
-#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0200
+#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0100
+ /* Write zeros to newly created written extents */
+#define EXT4_GET_BLOCKS_ZERO 0x0200
+#define EXT4_GET_BLOCKS_CREATE_ZERO (EXT4_GET_BLOCKS_CREATE |\
+ EXT4_GET_BLOCKS_ZERO)
/*
* The bit position of these flags must not overlap with any of the
@@ -616,6 +626,46 @@ enum {
#define EXT4_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16])
#define EXT4_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct ext4_encryption_policy)
+#ifndef FS_IOC_FSGETXATTR
+/* Until the uapi changes get merged for project quota... */
+
+#define FS_IOC_FSGETXATTR _IOR('X', 31, struct fsxattr)
+#define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr)
+
+/*
+ * Structure for FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR.
+ */
+struct fsxattr {
+ __u32 fsx_xflags; /* xflags field value (get/set) */
+ __u32 fsx_extsize; /* extsize field value (get/set)*/
+ __u32 fsx_nextents; /* nextents field value (get) */
+ __u32 fsx_projid; /* project identifier (get/set) */
+ unsigned char fsx_pad[12];
+};
+
+/*
+ * Flags for the fsx_xflags field
+ */
+#define FS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */
+#define FS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */
+#define FS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */
+#define FS_XFLAG_APPEND 0x00000010 /* all writes append */
+#define FS_XFLAG_SYNC 0x00000020 /* all writes synchronous */
+#define FS_XFLAG_NOATIME 0x00000040 /* do not update access time */
+#define FS_XFLAG_NODUMP 0x00000080 /* do not include in backups */
+#define FS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */
+#define FS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */
+#define FS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */
+#define FS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */
+#define FS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
+#define FS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */
+#define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */
+#define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
+#endif /* !defined(FS_IOC_FSGETXATTR) */
+
+#define EXT4_IOC_FSGETXATTR FS_IOC_FSGETXATTR
+#define EXT4_IOC_FSSETXATTR FS_IOC_FSSETXATTR
+
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
* ioctl commands in 32 bit emulation
@@ -910,6 +960,15 @@ struct ext4_inode_info {
* by other means, so we have i_data_sem.
*/
struct rw_semaphore i_data_sem;
+ /*
+ * i_mmap_sem is for serializing page faults with truncate / punch hole
+ * operations. We have to make sure that new page cannot be faulted in
+ * a section of the inode that is being punched. We cannot easily use
+ * i_data_sem for this since we need protection for the whole punch
+ * operation and i_data_sem ranks below transaction start so we have
+ * to occasionally drop it.
+ */
+ struct rw_semaphore i_mmap_sem;
struct inode vfs_inode;
struct jbd2_inode *jinode;
@@ -993,6 +1052,7 @@ struct ext4_inode_info {
/* Encryption params */
struct ext4_crypt_info *i_crypt_info;
#endif
+ kprojid_t i_projid;
};
/*
@@ -1248,7 +1308,7 @@ struct ext4_super_block {
#endif
/* Number of quota types we support */
-#define EXT4_MAXQUOTAS 2
+#define EXT4_MAXQUOTAS 3
/*
* fourth extended-fs super-block data in memory
@@ -1754,7 +1814,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
EXT4_FEATURE_RO_COMPAT_BIGALLOC |\
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\
- EXT4_FEATURE_RO_COMPAT_QUOTA)
+ EXT4_FEATURE_RO_COMPAT_QUOTA |\
+ EXT4_FEATURE_RO_COMPAT_PROJECT)
#define EXTN_FEATURE_FUNCS(ver) \
static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \
@@ -1796,6 +1857,11 @@ static inline bool ext4_has_incompat_features(struct super_block *sb)
#define EXT4_DEF_RESUID 0
#define EXT4_DEF_RESGID 0
+/*
+ * Default project ID
+ */
+#define EXT4_DEF_PROJID 0
+
#define EXT4_DEF_INODE_READAHEAD_BLKS 32
/*
@@ -2234,7 +2300,9 @@ void ext4_restore_control_page(struct page *data_page);
struct page *ext4_encrypt(struct inode *inode,
struct page *plaintext_page);
int ext4_decrypt(struct page *page);
-int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex);
+int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
+ ext4_fsblk_t pblk, ext4_lblk_t len);
+extern const struct dentry_operations ext4_encrypted_d_ops;
#ifdef CONFIG_EXT4_FS_ENCRYPTION
int ext4_init_crypto(void);
@@ -2440,8 +2508,8 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
int ext4_get_block_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
-int ext4_get_block_dax(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create);
+int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create);
int ext4_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
@@ -2484,9 +2552,13 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
loff_t lstart, loff_t lend);
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
+extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
+extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
extern void ext4_da_update_reserve_space(struct inode *inode,
int used, int quota_claim);
+extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
+ ext4_fsblk_t pblk, ext4_lblk_t len);
/* indirect.c */
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
@@ -2825,7 +2897,7 @@ do { \
static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
{
WARN_ON_ONCE(S_ISREG(inode->i_mode) &&
- !mutex_is_locked(&inode->i_mutex));
+ !inode_is_locked(inode));
down_write(&EXT4_I(inode)->i_data_sem);
if (newsize > EXT4_I(inode)->i_disksize)
EXT4_I(inode)->i_disksize = newsize;
@@ -2848,6 +2920,9 @@ static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
return changed;
}
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+ loff_t len);
+
struct ext4_group_info {
unsigned long bb_state;
struct rb_root bb_free_root;
@@ -2986,8 +3061,7 @@ extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
struct page *page);
extern int ext4_try_add_inline_entry(handle_t *handle,
struct ext4_filename *fname,
- struct dentry *dentry,
- struct inode *inode);
+ struct inode *dir, struct inode *inode);
extern int ext4_try_create_inline_dir(handle_t *handle,
struct inode *parent,
struct inode *inode);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 551353b1b..3753ceb0b 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3119,19 +3119,11 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
{
ext4_fsblk_t ee_pblock;
unsigned int ee_len;
- int ret;
ee_len = ext4_ext_get_actual_len(ex);
ee_pblock = ext4_ext_pblock(ex);
-
- if (ext4_encrypted_inode(inode))
- return ext4_encrypted_zeroout(inode, ex);
-
- ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
- if (ret > 0)
- ret = 0;
-
- return ret;
+ return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock,
+ ee_len);
}
/*
@@ -3936,7 +3928,7 @@ static int
convert_initialized_extent(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map,
struct ext4_ext_path **ppath, int flags,
- unsigned int allocated, ext4_fsblk_t newblock)
+ unsigned int allocated)
{
struct ext4_ext_path *path = *ppath;
struct ext4_extent *ex;
@@ -4052,6 +4044,14 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
}
/* IO end_io complete, convert the filled extent to written */
if (flags & EXT4_GET_BLOCKS_CONVERT) {
+ if (flags & EXT4_GET_BLOCKS_ZERO) {
+ if (allocated > map->m_len)
+ allocated = map->m_len;
+ err = ext4_issue_zeroout(inode, map->m_lblk, newblock,
+ allocated);
+ if (err < 0)
+ goto out2;
+ }
ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
ppath);
if (ret >= 0) {
@@ -4347,7 +4347,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
allocated = convert_initialized_extent(
handle, inode, map, &path,
- flags, allocated, newblock);
+ flags, allocated);
goto out2;
} else if (!ext4_ext_is_unwritten(ex))
goto out;
@@ -4685,10 +4685,6 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
if (len <= EXT_UNWRITTEN_MAX_LEN)
flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
- /* Wait all existing dio workers, newcomers will block on i_mutex */
- ext4_inode_block_unlocked_dio(inode);
- inode_dio_wait(inode);
-
/*
* credits to insert 1 extent into extent tree
*/
@@ -4752,8 +4748,6 @@ retry:
goto retry;
}
- ext4_inode_resume_unlocked_dio(inode);
-
return ret > 0 ? ret2 : ret;
}
@@ -4770,7 +4764,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
int partial_begin, partial_end;
loff_t start, end;
ext4_lblk_t lblk;
- struct address_space *mapping = inode->i_mapping;
unsigned int blkbits = inode->i_blkbits;
trace_ext4_zero_range(inode, offset, len, mode);
@@ -4786,17 +4779,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
}
/*
- * Write out all dirty pages to avoid race conditions
- * Then release them.
- */
- if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
- ret = filemap_write_and_wait_range(mapping, offset,
- offset + len - 1);
- if (ret)
- return ret;
- }
-
- /*
* Round up offset. This is not fallocate, we neet to zero out
* blocks, so convert interior block aligned part of the range to
* unwritten and possibly manually zero out unaligned parts of the
@@ -4817,7 +4799,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
else
max_blocks -= lblk;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* Indirect files do not support unwritten extnets
@@ -4839,6 +4821,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
if (mode & FALLOC_FL_KEEP_SIZE)
flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
+ /* Wait all existing dio workers, newcomers will block on i_mutex */
+ ext4_inode_block_unlocked_dio(inode);
+ inode_dio_wait(inode);
+
/* Preallocate the range including the unaligned edges */
if (partial_begin || partial_end) {
ret = ext4_alloc_file_blocks(file,
@@ -4847,7 +4833,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
round_down(offset, 1 << blkbits)) >> blkbits,
new_size, flags, mode);
if (ret)
- goto out_mutex;
+ goto out_dio;
}
@@ -4856,16 +4842,23 @@ static long ext4_zero_range(struct file *file, loff_t offset,
flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
EXT4_EX_NOCACHE);
- /* Now release the pages and zero block aligned part of pages*/
+ /*
+ * Prevent page faults from reinstantiating pages we have
+ * released from page cache.
+ */
+ down_write(&EXT4_I(inode)->i_mmap_sem);
+ ret = ext4_update_disksize_before_punch(inode, offset, len);
+ if (ret) {
+ up_write(&EXT4_I(inode)->i_mmap_sem);
+ goto out_dio;
+ }
+ /* Now release the pages and zero block aligned part of pages */
truncate_pagecache_range(inode, start, end - 1);
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
- /* Wait all existing dio workers, newcomers will block on i_mutex */
- ext4_inode_block_unlocked_dio(inode);
- inode_dio_wait(inode);
-
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
flags, mode);
+ up_write(&EXT4_I(inode)->i_mmap_sem);
if (ret)
goto out_dio;
}
@@ -4909,7 +4902,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
out_dio:
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
@@ -4980,7 +4973,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (mode & FALLOC_FL_KEEP_SIZE)
flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* We only support preallocation for extent-based files only
@@ -4998,8 +4991,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
goto out;
}
+ /* Wait all existing dio workers, newcomers will block on i_mutex */
+ ext4_inode_block_unlocked_dio(inode);
+ inode_dio_wait(inode);
+
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
flags, mode);
+ ext4_inode_resume_unlocked_dio(inode);
if (ret)
goto out;
@@ -5008,7 +5006,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
EXT4_I(inode)->i_sync_tid);
}
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
return ret;
}
@@ -5494,21 +5492,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
return ret;
}
- /*
- * Need to round down offset to be aligned with page size boundary
- * for page size > block size.
- */
- ioffset = round_down(offset, PAGE_SIZE);
-
- /* Write out all dirty pages */
- ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
- LLONG_MAX);
- if (ret)
- return ret;
-
- /* Take mutex lock */
- mutex_lock(&inode->i_mutex);
-
+ inode_lock(inode);
/*
* There is no need to overlap collapse range with EOF, in which case
* it is effectively a truncate operation
@@ -5524,17 +5508,43 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
goto out_mutex;
}
- truncate_pagecache(inode, ioffset);
-
/* Wait for existing dio to complete */
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
+ /*
+ * Prevent page faults from reinstantiating pages we have released from
+ * page cache.
+ */
+ down_write(&EXT4_I(inode)->i_mmap_sem);
+ /*
+ * Need to round down offset to be aligned with page size boundary
+ * for page size > block size.
+ */
+ ioffset = round_down(offset, PAGE_SIZE);
+ /*
+ * Write tail of the last page before removed range since it will get
+ * removed from the page cache below.
+ */
+ ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset);
+ if (ret)
+ goto out_mmap;
+ /*
+ * Write data that will be shifted to preserve them when discarding
+ * page cache below. We are also protected from pages becoming dirty
+ * by i_mmap_sem.
+ */
+ ret = filemap_write_and_wait_range(inode->i_mapping, offset + len,
+ LLONG_MAX);
+ if (ret)
+ goto out_mmap;
+ truncate_pagecache(inode, ioffset);
+
credits = ext4_writepage_trans_blocks(inode);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
- goto out_dio;
+ goto out_mmap;
}
down_write(&EXT4_I(inode)->i_data_sem);
@@ -5573,10 +5583,11 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
out_stop:
ext4_journal_stop(handle);
-out_dio:
+out_mmap:
+ up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
@@ -5627,21 +5638,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
return ret;
}
- /*
- * Need to round down to align start offset to page size boundary
- * for page size > block size.
- */
- ioffset = round_down(offset, PAGE_SIZE);
-
- /* Write out all dirty pages */
- ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
- LLONG_MAX);
- if (ret)
- return ret;
-
- /* Take mutex lock */
- mutex_lock(&inode->i_mutex);
-
+ inode_lock(inode);
/* Currently just for extent based files */
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
ret = -EOPNOTSUPP;
@@ -5660,17 +5657,32 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
goto out_mutex;
}
- truncate_pagecache(inode, ioffset);
-
/* Wait for existing dio to complete */
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
+ /*
+ * Prevent page faults from reinstantiating pages we have released from
+ * page cache.
+ */
+ down_write(&EXT4_I(inode)->i_mmap_sem);
+ /*
+ * Need to round down to align start offset to page size boundary
+ * for page size > block size.
+ */
+ ioffset = round_down(offset, PAGE_SIZE);
+ /* Write out all dirty pages */
+ ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
+ LLONG_MAX);
+ if (ret)
+ goto out_mmap;
+ truncate_pagecache(inode, ioffset);
+
credits = ext4_writepage_trans_blocks(inode);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
- goto out_dio;
+ goto out_mmap;
}
/* Expand file to avoid data loss if there is error while shifting */
@@ -5741,10 +5753,11 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
out_stop:
ext4_journal_stop(handle);
-out_dio:
+out_mmap:
+ up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
@@ -5779,8 +5792,8 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
- BUG_ON(!mutex_is_locked(&inode1->i_mutex));
- BUG_ON(!mutex_is_locked(&inode2->i_mutex));
+ BUG_ON(!inode_is_locked(inode1));
+ BUG_ON(!inode_is_locked(inode2));
*erp = ext4_es_remove_extent(inode1, lblk1, count);
if (unlikely(*erp))
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 113837e7b..4cd318f31 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -113,7 +113,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
ext4_unwritten_wait(inode);
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = generic_write_checks(iocb, from);
if (ret <= 0)
goto out;
@@ -169,7 +169,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
}
ret = __generic_file_write_iter(iocb, from);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (ret > 0) {
ssize_t err;
@@ -186,50 +186,42 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
return ret;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (aio_mutex)
mutex_unlock(aio_mutex);
return ret;
}
#ifdef CONFIG_FS_DAX
-static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
-{
- struct inode *inode = bh->b_assoc_map->host;
- /* XXX: breaks on 32-bit > 16TB. Is that even supported? */
- loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
- int err;
- if (!uptodate)
- return;
- WARN_ON(!buffer_unwritten(bh));
- err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
-}
-
static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
int result;
handle_t *handle = NULL;
- struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+ struct inode *inode = file_inode(vma->vm_file);
+ struct super_block *sb = inode->i_sb;
bool write = vmf->flags & FAULT_FLAG_WRITE;
if (write) {
sb_start_pagefault(sb);
file_update_time(vma->vm_file);
+ down_read(&EXT4_I(inode)->i_mmap_sem);
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
EXT4_DATA_TRANS_BLOCKS(sb));
- }
+ } else
+ down_read(&EXT4_I(inode)->i_mmap_sem);
if (IS_ERR(handle))
result = VM_FAULT_SIGBUS;
else
- result = __dax_fault(vma, vmf, ext4_get_block_dax,
- ext4_end_io_unwritten);
+ result = __dax_fault(vma, vmf, ext4_dax_mmap_get_block, NULL);
if (write) {
if (!IS_ERR(handle))
ext4_journal_stop(handle);
+ up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(sb);
- }
+ } else
+ up_read(&EXT4_I(inode)->i_mmap_sem);
return result;
}
@@ -246,44 +238,73 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
if (write) {
sb_start_pagefault(sb);
file_update_time(vma->vm_file);
+ down_read(&EXT4_I(inode)->i_mmap_sem);
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
ext4_chunk_trans_blocks(inode,
PMD_SIZE / PAGE_SIZE));
- }
+ } else
+ down_read(&EXT4_I(inode)->i_mmap_sem);
if (IS_ERR(handle))
result = VM_FAULT_SIGBUS;
else
result = __dax_pmd_fault(vma, addr, pmd, flags,
- ext4_get_block_dax, ext4_end_io_unwritten);
+ ext4_dax_mmap_get_block, NULL);
if (write) {
if (!IS_ERR(handle))
ext4_journal_stop(handle);
+ up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(sb);
- }
+ } else
+ up_read(&EXT4_I(inode)->i_mmap_sem);
return result;
}
-static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+/*
+ * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_fault()
+ * handler we check for races agaist truncate. Note that since we cycle through
+ * i_mmap_sem, we are sure that also any hole punching that began before we
+ * were called is finished by now and so if it included part of the file we
+ * are working on, our pte will get unmapped and the check for pte_same() in
+ * wp_pfn_shared() fails. Thus fault gets retried and things work out as
+ * desired.
+ */
+static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
+ struct vm_fault *vmf)
{
- return dax_mkwrite(vma, vmf, ext4_get_block_dax,
- ext4_end_io_unwritten);
+ struct inode *inode = file_inode(vma->vm_file);
+ struct super_block *sb = inode->i_sb;
+ loff_t size;
+ int ret;
+
+ sb_start_pagefault(sb);
+ file_update_time(vma->vm_file);
+ down_read(&EXT4_I(inode)->i_mmap_sem);
+ size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ if (vmf->pgoff >= size)
+ ret = VM_FAULT_SIGBUS;
+ else
+ ret = dax_pfn_mkwrite(vma, vmf);
+ up_read(&EXT4_I(inode)->i_mmap_sem);
+ sb_end_pagefault(sb);
+
+ return ret;
}
static const struct vm_operations_struct ext4_dax_vm_ops = {
.fault = ext4_dax_fault,
.pmd_fault = ext4_dax_pmd_fault,
- .page_mkwrite = ext4_dax_mkwrite,
- .pfn_mkwrite = dax_pfn_mkwrite,
+ .page_mkwrite = ext4_dax_fault,
+ .pfn_mkwrite = ext4_dax_pfn_mkwrite,
};
#else
#define ext4_dax_vm_ops ext4_file_vm_ops
#endif
static const struct vm_operations_struct ext4_file_vm_ops = {
- .fault = filemap_fault,
+ .fault = ext4_filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = ext4_page_mkwrite,
};
@@ -314,6 +335,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
struct super_block *sb = inode->i_sb;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct vfsmount *mnt = filp->f_path.mnt;
+ struct inode *dir = filp->f_path.dentry->d_parent->d_inode;
struct path path;
char buf[64], *cp;
int ret;
@@ -357,6 +379,14 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
if (ext4_encryption_info(inode) == NULL)
return -ENOKEY;
}
+ if (ext4_encrypted_inode(dir) &&
+ !ext4_is_child_context_consistent_with_parent(dir, inode)) {
+ ext4_warning(inode->i_sb,
+ "Inconsistent encryption contexts: %lu/%lu\n",
+ (unsigned long) dir->i_ino,
+ (unsigned long) inode->i_ino);
+ return -EPERM;
+ }
/*
* Set up the jbd2_inode if we are opening the inode for
* writing and the journal is present
@@ -527,11 +557,11 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
int blkbits;
int ret = 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
isize = i_size_read(inode);
if (offset >= isize) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return -ENXIO;
}
@@ -579,7 +609,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
dataoff = (loff_t)last << blkbits;
} while (last <= end);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (dataoff > isize)
return -ENXIO;
@@ -600,11 +630,11 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
int blkbits;
int ret = 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
isize = i_size_read(inode);
if (offset >= isize) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return -ENXIO;
}
@@ -655,7 +685,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
break;
} while (last <= end);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (holeoff > isize)
holeoff = isize;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 53f2b98a6..acc0ad56b 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -801,6 +801,13 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
inode->i_gid = dir->i_gid;
} else
inode_init_owner(inode, dir, mode);
+
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+ ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT))
+ ei->i_projid = EXT4_I(dir)->i_projid;
+ else
+ ei->i_projid = make_kprojid(&init_user_ns, EXT4_DEF_PROJID);
+
err = dquot_initialize(inode);
if (err)
goto out;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index d884989cc..dfe3b9baf 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -995,12 +995,11 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
*/
static int ext4_add_dirent_to_inline(handle_t *handle,
struct ext4_filename *fname,
- struct dentry *dentry,
+ struct inode *dir,
struct inode *inode,
struct ext4_iloc *iloc,
void *inline_start, int inline_size)
{
- struct inode *dir = d_inode(dentry->d_parent);
int err;
struct ext4_dir_entry_2 *de;
@@ -1245,12 +1244,11 @@ out:
* the new created block.
*/
int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
- struct dentry *dentry, struct inode *inode)
+ struct inode *dir, struct inode *inode)
{
int ret, inline_size;
void *inline_start;
struct ext4_iloc iloc;
- struct inode *dir = d_inode(dentry->d_parent);
ret = ext4_get_inode_loc(dir, &iloc);
if (ret)
@@ -1264,7 +1262,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
EXT4_INLINE_DOTDOT_SIZE;
inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
- ret = ext4_add_dirent_to_inline(handle, fname, dentry, inode, &iloc,
+ ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc,
inline_start, inline_size);
if (ret != -ENOSPC)
goto out;
@@ -1285,7 +1283,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
if (inline_size) {
inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
- ret = ext4_add_dirent_to_inline(handle, fname, dentry,
+ ret = ext4_add_dirent_to_inline(handle, fname, dir,
inode, &iloc, inline_start,
inline_size);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 06bda0361..aee960b1a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -383,6 +383,21 @@ static int __check_block_validity(struct inode *inode, const char *func,
return 0;
}
+int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk,
+ ext4_lblk_t len)
+{
+ int ret;
+
+ if (ext4_encrypted_inode(inode))
+ return ext4_encrypted_zeroout(inode, lblk, pblk, len);
+
+ ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS);
+ if (ret > 0)
+ ret = 0;
+
+ return ret;
+}
+
#define check_block_validity(inode, map) \
__check_block_validity((inode), __func__, __LINE__, (map))
@@ -403,8 +418,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
* out taking i_data_sem. So at the time the unwritten extent
* could be converted.
*/
- if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
- down_read(&EXT4_I(inode)->i_data_sem);
+ down_read(&EXT4_I(inode)->i_data_sem);
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
retval = ext4_ext_map_blocks(handle, inode, map, flags &
EXT4_GET_BLOCKS_KEEP_SIZE);
@@ -412,8 +426,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
retval = ext4_ind_map_blocks(handle, inode, map, flags &
EXT4_GET_BLOCKS_KEEP_SIZE);
}
- if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
- up_read((&EXT4_I(inode)->i_data_sem));
+ up_read((&EXT4_I(inode)->i_data_sem));
/*
* We don't check m_len because extent will be collpased in status
@@ -509,8 +522,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
* Try to see if we can get the block without requesting a new
* file system block.
*/
- if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
- down_read(&EXT4_I(inode)->i_data_sem);
+ down_read(&EXT4_I(inode)->i_data_sem);
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
retval = ext4_ext_map_blocks(handle, inode, map, flags &
EXT4_GET_BLOCKS_KEEP_SIZE);
@@ -541,8 +553,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
if (ret < 0)
retval = ret;
}
- if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
- up_read((&EXT4_I(inode)->i_data_sem));
+ up_read((&EXT4_I(inode)->i_data_sem));
found:
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
@@ -626,13 +637,29 @@ found:
}
/*
+ * We have to zeroout blocks before inserting them into extent
+ * status tree. Otherwise someone could look them up there and
+ * use them before they are really zeroed.
+ */
+ if (flags & EXT4_GET_BLOCKS_ZERO &&
+ map->m_flags & EXT4_MAP_MAPPED &&
+ map->m_flags & EXT4_MAP_NEW) {
+ ret = ext4_issue_zeroout(inode, map->m_lblk,
+ map->m_pblk, map->m_len);
+ if (ret) {
+ retval = ret;
+ goto out_sem;
+ }
+ }
+
+ /*
* If the extent has been zeroed out, we don't need to update
* extent status tree.
*/
if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
if (ext4_es_is_written(&es))
- goto has_zeroout;
+ goto out_sem;
}
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
@@ -643,11 +670,13 @@ found:
status |= EXTENT_STATUS_DELAYED;
ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
map->m_pblk, status);
- if (ret < 0)
+ if (ret < 0) {
retval = ret;
+ goto out_sem;
+ }
}
-has_zeroout:
+out_sem:
up_write((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
ret = check_block_validity(inode, map);
@@ -702,7 +731,7 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
map.m_lblk = iblock;
map.m_len = bh->b_size >> inode->i_blkbits;
- if (flags && !(flags & EXT4_GET_BLOCKS_NO_LOCK) && !handle) {
+ if (flags && !handle) {
/* Direct IO write... */
if (map.m_len > DIO_MAX_BLOCKS)
map.m_len = DIO_MAX_BLOCKS;
@@ -722,16 +751,6 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
map_bh(bh, inode->i_sb, map.m_pblk);
ext4_update_bh_state(bh, map.m_flags);
- if (IS_DAX(inode) && buffer_unwritten(bh)) {
- /*
- * dgc: I suspect unwritten conversion on ext4+DAX is
- * fundamentally broken here when there are concurrent
- * read/write in progress on this inode.
- */
- WARN_ON_ONCE(io_end);
- bh->b_assoc_map = inode->i_mapping;
- bh->b_private = (void *)(unsigned long)iblock;
- }
if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
set_buffer_defer_completion(bh);
bh->b_size = inode->i_sb->s_blocksize * map.m_len;
@@ -907,9 +926,6 @@ int do_journal_get_write_access(handle_t *handle,
return ret;
}
-static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create);
-
#ifdef CONFIG_EXT4_FS_ENCRYPTION
static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block)
@@ -2462,6 +2478,10 @@ static int ext4_writepages(struct address_space *mapping,
trace_ext4_writepages(inode, wbc);
+ if (dax_mapping(mapping))
+ return dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev,
+ wbc);
+
/*
* No pages to write? This is mainly a kludge to avoid starting
* a transaction for special inodes like journal inode on last iput()
@@ -3082,25 +3102,96 @@ int ext4_get_block_write(struct inode *inode, sector_t iblock,
EXT4_GET_BLOCKS_IO_CREATE_EXT);
}
-static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
+static int ext4_get_block_overwrite(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
- ext4_debug("ext4_get_block_write_nolock: inode %lu, create flag %d\n",
+ int ret;
+
+ ext4_debug("ext4_get_block_overwrite: inode %lu, create flag %d\n",
inode->i_ino, create);
- return _ext4_get_block(inode, iblock, bh_result,
- EXT4_GET_BLOCKS_NO_LOCK);
+ ret = _ext4_get_block(inode, iblock, bh_result, 0);
+ /*
+ * Blocks should have been preallocated! ext4_file_write_iter() checks
+ * that.
+ */
+ WARN_ON_ONCE(!buffer_mapped(bh_result));
+
+ return ret;
}
-int ext4_get_block_dax(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
+#ifdef CONFIG_FS_DAX
+int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
{
- int flags = EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_UNWRIT_EXT;
- if (create)
- flags |= EXT4_GET_BLOCKS_CREATE;
- ext4_debug("ext4_get_block_dax: inode %lu, create flag %d\n",
+ int ret, err;
+ int credits;
+ struct ext4_map_blocks map;
+ handle_t *handle = NULL;
+ int flags = 0;
+
+ ext4_debug("ext4_dax_mmap_get_block: inode %lu, create flag %d\n",
inode->i_ino, create);
- return _ext4_get_block(inode, iblock, bh_result, flags);
+ map.m_lblk = iblock;
+ map.m_len = bh_result->b_size >> inode->i_blkbits;
+ credits = ext4_chunk_trans_blocks(inode, map.m_len);
+ if (create) {
+ flags |= EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_CREATE_ZERO;
+ handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ return ret;
+ }
+ }
+
+ ret = ext4_map_blocks(handle, inode, &map, flags);
+ if (create) {
+ err = ext4_journal_stop(handle);
+ if (ret >= 0 && err < 0)
+ ret = err;
+ }
+ if (ret <= 0)
+ goto out;
+ if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+ int err2;
+
+ /*
+ * We are protected by i_mmap_sem so we know block cannot go
+ * away from under us even though we dropped i_data_sem.
+ * Convert extent to written and write zeros there.
+ *
+ * Note: We may get here even when create == 0.
+ */
+ handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ goto out;
+ }
+
+ err = ext4_map_blocks(handle, inode, &map,
+ EXT4_GET_BLOCKS_CONVERT | EXT4_GET_BLOCKS_CREATE_ZERO);
+ if (err < 0)
+ ret = err;
+ err2 = ext4_journal_stop(handle);
+ if (err2 < 0 && ret > 0)
+ ret = err2;
+ }
+out:
+ WARN_ON_ONCE(ret == 0 && create);
+ if (ret > 0) {
+ map_bh(bh_result, inode->i_sb, map.m_pblk);
+ bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) |
+ map.m_flags;
+ /*
+ * At least for now we have to clear BH_New so that DAX code
+ * doesn't attempt to zero blocks again in a racy way.
+ */
+ bh_result->b_state &= ~(1 << BH_New);
+ bh_result->b_size = map.m_len << inode->i_blkbits;
+ ret = 0;
+ }
+ return ret;
}
+#endif
static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
ssize_t size, void *private)
@@ -3171,10 +3262,8 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
/* If we do a overwrite dio, i_mutex locking can be released */
overwrite = *((int *)iocb->private);
- if (overwrite) {
- down_read(&EXT4_I(inode)->i_data_sem);
- mutex_unlock(&inode->i_mutex);
- }
+ if (overwrite)
+ inode_unlock(inode);
/*
* We could direct write to holes and fallocate.
@@ -3196,29 +3285,29 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
* case, we allocate an io_end structure to hook to the iocb.
*/
iocb->private = NULL;
- ext4_inode_aio_set(inode, NULL);
- if (!is_sync_kiocb(iocb)) {
- io_end = ext4_init_io_end(inode, GFP_NOFS);
- if (!io_end) {
- ret = -ENOMEM;
- goto retake_lock;
- }
- /*
- * Grab reference for DIO. Will be dropped in ext4_end_io_dio()
- */
- iocb->private = ext4_get_io_end(io_end);
- /*
- * we save the io structure for current async direct
- * IO, so that later ext4_map_blocks() could flag the
- * io structure whether there is a unwritten extents
- * needs to be converted when IO is completed.
- */
- ext4_inode_aio_set(inode, io_end);
- }
-
if (overwrite) {
- get_block_func = ext4_get_block_write_nolock;
+ get_block_func = ext4_get_block_overwrite;
} else {
+ ext4_inode_aio_set(inode, NULL);
+ if (!is_sync_kiocb(iocb)) {
+ io_end = ext4_init_io_end(inode, GFP_NOFS);
+ if (!io_end) {
+ ret = -ENOMEM;
+ goto retake_lock;
+ }
+ /*
+ * Grab reference for DIO. Will be dropped in
+ * ext4_end_io_dio()
+ */
+ iocb->private = ext4_get_io_end(io_end);
+ /*
+ * we save the io structure for current async direct
+ * IO, so that later ext4_map_blocks() could flag the
+ * io structure whether there is a unwritten extents
+ * needs to be converted when IO is completed.
+ */
+ ext4_inode_aio_set(inode, io_end);
+ }
get_block_func = ext4_get_block_write;
dio_flags = DIO_LOCKING;
}
@@ -3273,10 +3362,8 @@ retake_lock:
if (iov_iter_rw(iter) == WRITE)
inode_dio_end(inode);
/* take i_mutex locking again if we do a ovewrite dio */
- if (overwrite) {
- up_read(&EXT4_I(inode)->i_data_sem);
- mutex_lock(&inode->i_mutex);
- }
+ if (overwrite)
+ inode_lock(inode);
return ret;
}
@@ -3587,6 +3674,35 @@ int ext4_can_truncate(struct inode *inode)
}
/*
+ * We have to make sure i_disksize gets properly updated before we truncate
+ * page cache due to hole punching or zero range. Otherwise i_disksize update
+ * can get lost as it may have been postponed to submission of writeback but
+ * that will never happen after we truncate page cache.
+ */
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+ loff_t len)
+{
+ handle_t *handle;
+ loff_t size = i_size_read(inode);
+
+ WARN_ON(!inode_is_locked(inode));
+ if (offset > size || offset + len < size)
+ return 0;
+
+ if (EXT4_I(inode)->i_disksize >= size)
+ return 0;
+
+ handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+ ext4_update_i_disksize(inode, size);
+ ext4_mark_inode_dirty(handle, inode);
+ ext4_journal_stop(handle);
+
+ return 0;
+}
+
+/*
* ext4_punch_hole: punches a hole in a file by releaseing the blocks
* associated with the given offset and length
*
@@ -3623,7 +3739,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
return ret;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* No need to punch hole beyond i_size */
if (offset >= inode->i_size)
@@ -3651,17 +3767,26 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
}
+ /* Wait all existing dio workers, newcomers will block on i_mutex */
+ ext4_inode_block_unlocked_dio(inode);
+ inode_dio_wait(inode);
+
+ /*
+ * Prevent page faults from reinstantiating pages we have released from
+ * page cache.
+ */
+ down_write(&EXT4_I(inode)->i_mmap_sem);
first_block_offset = round_up(offset, sb->s_blocksize);
last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
/* Now release the pages and zero block aligned part of pages*/
- if (last_block_offset > first_block_offset)
+ if (last_block_offset > first_block_offset) {
+ ret = ext4_update_disksize_before_punch(inode, offset, length);
+ if (ret)
+ goto out_dio;
truncate_pagecache_range(inode, first_block_offset,
last_block_offset);
-
- /* Wait all existing dio workers, newcomers will block on i_mutex */
- ext4_inode_block_unlocked_dio(inode);
- inode_dio_wait(inode);
+ }
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
credits = ext4_writepage_trans_blocks(inode);
@@ -3708,19 +3833,15 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
if (IS_SYNC(inode))
ext4_handle_sync(handle);
- /* Now release the pages again to reduce race window */
- if (last_block_offset > first_block_offset)
- truncate_pagecache_range(inode, first_block_offset,
- last_block_offset);
-
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
out_stop:
ext4_journal_stop(handle);
out_dio:
+ up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
@@ -3790,7 +3911,7 @@ void ext4_truncate(struct inode *inode)
* have i_mutex locked because it's not necessary.
*/
if (!(inode->i_state & (I_NEW|I_FREEING)))
- WARN_ON(!mutex_is_locked(&inode->i_mutex));
+ WARN_ON(!inode_is_locked(inode));
trace_ext4_truncate_enter(inode);
if (!ext4_can_truncate(inode))
@@ -4038,7 +4159,7 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
- if (test_opt(inode->i_sb, DAX))
+ if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
new_fl |= S_DAX;
inode_set_flags(inode, new_fl,
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
@@ -4104,6 +4225,14 @@ static inline void ext4_iget_extra_inode(struct inode *inode,
EXT4_I(inode)->i_inline_off = 0;
}
+int ext4_get_projid(struct inode *inode, kprojid_t *projid)
+{
+ if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, EXT4_FEATURE_RO_COMPAT_PROJECT))
+ return -EOPNOTSUPP;
+ *projid = EXT4_I(inode)->i_projid;
+ return 0;
+}
+
struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
{
struct ext4_iloc iloc;
@@ -4115,6 +4244,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
int block;
uid_t i_uid;
gid_t i_gid;
+ projid_t i_projid;
inode = iget_locked(sb, ino);
if (!inode)
@@ -4164,12 +4294,20 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
inode->i_mode = le16_to_cpu(raw_inode->i_mode);
i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+ EXT4_INODE_SIZE(sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+ EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
+ i_projid = (projid_t)le32_to_cpu(raw_inode->i_projid);
+ else
+ i_projid = EXT4_DEF_PROJID;
+
if (!(test_opt(inode->i_sb, NO_UID32))) {
i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
}
i_uid_write(inode, i_uid);
i_gid_write(inode, i_gid);
+ ei->i_projid = make_kprojid(&init_user_ns, i_projid);
set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
@@ -4311,6 +4449,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
inode->i_op = &ext4_symlink_inode_operations;
ext4_set_aops(inode);
}
+ inode_nohighmem(inode);
} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
inode->i_op = &ext4_special_inode_operations;
@@ -4467,6 +4606,7 @@ static int ext4_do_update_inode(handle_t *handle,
int need_datasync = 0, set_large_file = 0;
uid_t i_uid;
gid_t i_gid;
+ projid_t i_projid;
spin_lock(&ei->i_raw_lock);
@@ -4479,6 +4619,7 @@ static int ext4_do_update_inode(handle_t *handle,
raw_inode->i_mode = cpu_to_le16(inode->i_mode);
i_uid = i_uid_read(inode);
i_gid = i_gid_read(inode);
+ i_projid = from_kprojid(&init_user_ns, ei->i_projid);
if (!(test_opt(inode->i_sb, NO_UID32))) {
raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid));
@@ -4556,6 +4697,15 @@ static int ext4_do_update_inode(handle_t *handle,
cpu_to_le16(ei->i_extra_isize);
}
}
+
+ BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+ EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+ i_projid != EXT4_DEF_PROJID);
+
+ if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+ EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
+ raw_inode->i_projid = cpu_to_le32(i_projid);
+
ext4_inode_csum_set(inode, raw_inode, ei);
spin_unlock(&ei->i_raw_lock);
if (inode->i_sb->s_flags & MS_LAZYTIME)
@@ -4851,6 +5001,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
} else
ext4_wait_for_tail_page_commit(inode);
}
+ down_write(&EXT4_I(inode)->i_mmap_sem);
/*
* Truncate pagecache after we've waited for commit
* in data=journal mode to make pages freeable.
@@ -4858,6 +5009,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
truncate_pagecache(inode, inode->i_size);
if (shrink)
ext4_truncate(inode);
+ up_write(&EXT4_I(inode)->i_mmap_sem);
}
if (!rc) {
@@ -5306,6 +5458,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
+
+ down_read(&EXT4_I(inode)->i_mmap_sem);
/* Delalloc case is easy... */
if (test_opt(inode->i_sb, DELALLOC) &&
!ext4_should_journal_data(inode) &&
@@ -5375,6 +5529,19 @@ retry_alloc:
out_ret:
ret = block_page_mkwrite_return(ret);
out:
+ up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(inode->i_sb);
return ret;
}
+
+int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct inode *inode = file_inode(vma->vm_file);
+ int err;
+
+ down_read(&EXT4_I(inode)->i_mmap_sem);
+ err = filemap_fault(vma, vmf);
+ up_read(&EXT4_I(inode)->i_mmap_sem);
+
+ return err;
+}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 5e872fd40..eae5917c5 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -14,6 +14,7 @@
#include <linux/mount.h>
#include <linux/file.h>
#include <linux/random.h>
+#include <linux/quotaops.h>
#include <asm/uaccess.h>
#include "ext4_jbd2.h"
#include "ext4.h"
@@ -202,6 +203,238 @@ static int uuid_is_zero(__u8 u[16])
return 1;
}
+static int ext4_ioctl_setflags(struct inode *inode,
+ unsigned int flags)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ handle_t *handle = NULL;
+ int err = -EPERM, migrate = 0;
+ struct ext4_iloc iloc;
+ unsigned int oldflags, mask, i;
+ unsigned int jflag;
+
+ /* Is it quota file? Do not allow user to mess with it */
+ if (IS_NOQUOTA(inode))
+ goto flags_out;
+
+ oldflags = ei->i_flags;
+
+ /* The JOURNAL_DATA flag is modifiable only by root */
+ jflag = flags & EXT4_JOURNAL_DATA_FL;
+
+ /*
+ * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+ * the relevant capability.
+ *
+ * This test looks nicer. Thanks to Pauline Middelink
+ */
+ if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
+ if (!capable(CAP_LINUX_IMMUTABLE))
+ goto flags_out;
+ }
+
+ /*
+ * The JOURNAL_DATA flag can only be changed by
+ * the relevant capability.
+ */
+ if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
+ if (!capable(CAP_SYS_RESOURCE))
+ goto flags_out;
+ }
+ if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
+ migrate = 1;
+
+ if (flags & EXT4_EOFBLOCKS_FL) {
+ /* we don't support adding EOFBLOCKS flag */
+ if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
+ err = -EOPNOTSUPP;
+ goto flags_out;
+ }
+ } else if (oldflags & EXT4_EOFBLOCKS_FL)
+ ext4_truncate(inode);
+
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto flags_out;
+ }
+ if (IS_SYNC(inode))
+ ext4_handle_sync(handle);
+ err = ext4_reserve_inode_write(handle, inode, &iloc);
+ if (err)
+ goto flags_err;
+
+ for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
+ if (!(mask & EXT4_FL_USER_MODIFIABLE))
+ continue;
+ if (mask & flags)
+ ext4_set_inode_flag(inode, i);
+ else
+ ext4_clear_inode_flag(inode, i);
+ }
+
+ ext4_set_inode_flags(inode);
+ inode->i_ctime = ext4_current_time(inode);
+
+ err = ext4_mark_iloc_dirty(handle, inode, &iloc);
+flags_err:
+ ext4_journal_stop(handle);
+ if (err)
+ goto flags_out;
+
+ if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
+ err = ext4_change_inode_journal_flag(inode, jflag);
+ if (err)
+ goto flags_out;
+ if (migrate) {
+ if (flags & EXT4_EXTENTS_FL)
+ err = ext4_ext_migrate(inode);
+ else
+ err = ext4_ind_migrate(inode);
+ }
+
+flags_out:
+ return err;
+}
+
+#ifdef CONFIG_QUOTA
+static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
+{
+ struct inode *inode = file_inode(filp);
+ struct super_block *sb = inode->i_sb;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ int err, rc;
+ handle_t *handle;
+ kprojid_t kprojid;
+ struct ext4_iloc iloc;
+ struct ext4_inode *raw_inode;
+
+ if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
+ EXT4_FEATURE_RO_COMPAT_PROJECT)) {
+ if (projid != EXT4_DEF_PROJID)
+ return -EOPNOTSUPP;
+ else
+ return 0;
+ }
+
+ if (EXT4_INODE_SIZE(sb) <= EXT4_GOOD_OLD_INODE_SIZE)
+ return -EOPNOTSUPP;
+
+ kprojid = make_kprojid(&init_user_ns, (projid_t)projid);
+
+ if (projid_eq(kprojid, EXT4_I(inode)->i_projid))
+ return 0;
+
+ err = mnt_want_write_file(filp);
+ if (err)
+ return err;
+
+ err = -EPERM;
+ inode_lock(inode);
+ /* Is it quota file? Do not allow user to mess with it */
+ if (IS_NOQUOTA(inode))
+ goto out_unlock;
+
+ err = ext4_get_inode_loc(inode, &iloc);
+ if (err)
+ goto out_unlock;
+
+ raw_inode = ext4_raw_inode(&iloc);
+ if (!EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) {
+ err = -EOVERFLOW;
+ brelse(iloc.bh);
+ goto out_unlock;
+ }
+ brelse(iloc.bh);
+
+ dquot_initialize(inode);
+
+ handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
+ EXT4_QUOTA_INIT_BLOCKS(sb) +
+ EXT4_QUOTA_DEL_BLOCKS(sb) + 3);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto out_unlock;
+ }
+
+ err = ext4_reserve_inode_write(handle, inode, &iloc);
+ if (err)
+ goto out_stop;
+
+ if (sb_has_quota_limits_enabled(sb, PRJQUOTA)) {
+ struct dquot *transfer_to[MAXQUOTAS] = { };
+
+ transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
+ if (transfer_to[PRJQUOTA]) {
+ err = __dquot_transfer(inode, transfer_to);
+ dqput(transfer_to[PRJQUOTA]);
+ if (err)
+ goto out_dirty;
+ }
+ }
+ EXT4_I(inode)->i_projid = kprojid;
+ inode->i_ctime = ext4_current_time(inode);
+out_dirty:
+ rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
+ if (!err)
+ err = rc;
+out_stop:
+ ext4_journal_stop(handle);
+out_unlock:
+ inode_unlock(inode);
+ mnt_drop_write_file(filp);
+ return err;
+}
+#else
+static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
+{
+ if (projid != EXT4_DEF_PROJID)
+ return -EOPNOTSUPP;
+ return 0;
+}
+#endif
+
+/* Transfer internal flags to xflags */
+static inline __u32 ext4_iflags_to_xflags(unsigned long iflags)
+{
+ __u32 xflags = 0;
+
+ if (iflags & EXT4_SYNC_FL)
+ xflags |= FS_XFLAG_SYNC;
+ if (iflags & EXT4_IMMUTABLE_FL)
+ xflags |= FS_XFLAG_IMMUTABLE;
+ if (iflags & EXT4_APPEND_FL)
+ xflags |= FS_XFLAG_APPEND;
+ if (iflags & EXT4_NODUMP_FL)
+ xflags |= FS_XFLAG_NODUMP;
+ if (iflags & EXT4_NOATIME_FL)
+ xflags |= FS_XFLAG_NOATIME;
+ if (iflags & EXT4_PROJINHERIT_FL)
+ xflags |= FS_XFLAG_PROJINHERIT;
+ return xflags;
+}
+
+/* Transfer xflags flags to internal */
+static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
+{
+ unsigned long iflags = 0;
+
+ if (xflags & FS_XFLAG_SYNC)
+ iflags |= EXT4_SYNC_FL;
+ if (xflags & FS_XFLAG_IMMUTABLE)
+ iflags |= EXT4_IMMUTABLE_FL;
+ if (xflags & FS_XFLAG_APPEND)
+ iflags |= EXT4_APPEND_FL;
+ if (xflags & FS_XFLAG_NODUMP)
+ iflags |= EXT4_NODUMP_FL;
+ if (xflags & FS_XFLAG_NOATIME)
+ iflags |= EXT4_NOATIME_FL;
+ if (xflags & FS_XFLAG_PROJINHERIT)
+ iflags |= EXT4_PROJINHERIT_FL;
+
+ return iflags;
+}
+
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -217,11 +450,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
return put_user(flags, (int __user *) arg);
case EXT4_IOC_SETFLAGS: {
- handle_t *handle = NULL;
- int err, migrate = 0;
- struct ext4_iloc iloc;
- unsigned int oldflags, mask, i;
- unsigned int jflag;
+ int err;
if (!inode_owner_or_capable(inode))
return -EACCES;
@@ -235,90 +464,9 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
flags = ext4_mask_flags(inode->i_mode, flags);
- err = -EPERM;
- mutex_lock(&inode->i_mutex);
- /* Is it quota file? Do not allow user to mess with it */
- if (IS_NOQUOTA(inode))
- goto flags_out;
-
- oldflags = ei->i_flags;
-
- /* The JOURNAL_DATA flag is modifiable only by root */
- jflag = flags & EXT4_JOURNAL_DATA_FL;
-
- /*
- * The IMMUTABLE and APPEND_ONLY flags can only be changed by
- * the relevant capability.
- *
- * This test looks nicer. Thanks to Pauline Middelink
- */
- if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
- if (!capable(CAP_LINUX_IMMUTABLE))
- goto flags_out;
- }
-
- /*
- * The JOURNAL_DATA flag can only be changed by
- * the relevant capability.
- */
- if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
- if (!capable(CAP_SYS_RESOURCE))
- goto flags_out;
- }
- if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
- migrate = 1;
-
- if (flags & EXT4_EOFBLOCKS_FL) {
- /* we don't support adding EOFBLOCKS flag */
- if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
- err = -EOPNOTSUPP;
- goto flags_out;
- }
- } else if (oldflags & EXT4_EOFBLOCKS_FL)
- ext4_truncate(inode);
-
- handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
- if (IS_ERR(handle)) {
- err = PTR_ERR(handle);
- goto flags_out;
- }
- if (IS_SYNC(inode))
- ext4_handle_sync(handle);
- err = ext4_reserve_inode_write(handle, inode, &iloc);
- if (err)
- goto flags_err;
-
- for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
- if (!(mask & EXT4_FL_USER_MODIFIABLE))
- continue;
- if (mask & flags)
- ext4_set_inode_flag(inode, i);
- else
- ext4_clear_inode_flag(inode, i);
- }
-
- ext4_set_inode_flags(inode);
- inode->i_ctime = ext4_current_time(inode);
-
- err = ext4_mark_iloc_dirty(handle, inode, &iloc);
-flags_err:
- ext4_journal_stop(handle);
- if (err)
- goto flags_out;
-
- if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
- err = ext4_change_inode_journal_flag(inode, jflag);
- if (err)
- goto flags_out;
- if (migrate) {
- if (flags & EXT4_EXTENTS_FL)
- err = ext4_ext_migrate(inode);
- else
- err = ext4_ind_migrate(inode);
- }
-
-flags_out:
- mutex_unlock(&inode->i_mutex);
+ inode_lock(inode);
+ err = ext4_ioctl_setflags(inode, flags);
+ inode_unlock(inode);
mnt_drop_write_file(filp);
return err;
}
@@ -349,7 +497,7 @@ flags_out:
goto setversion_out;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
@@ -364,7 +512,7 @@ flags_out:
ext4_journal_stop(handle);
unlock_out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
setversion_out:
mnt_drop_write_file(filp);
return err;
@@ -435,6 +583,11 @@ group_extend_out:
"Online defrag not supported with bigalloc");
err = -EOPNOTSUPP;
goto mext_out;
+ } else if (IS_DAX(inode)) {
+ ext4_msg(sb, KERN_ERR,
+ "Online defrag not supported with DAX");
+ err = -EOPNOTSUPP;
+ goto mext_out;
}
err = mnt_want_write_file(filp);
@@ -510,9 +663,9 @@ group_add_out:
* ext4_ext_swap_inode_data before we switch the
* inode format to prevent read.
*/
- mutex_lock(&(inode->i_mutex));
+ inode_lock((inode));
err = ext4_ext_migrate(inode);
- mutex_unlock(&(inode->i_mutex));
+ inode_unlock((inode));
mnt_drop_write_file(filp);
return err;
}
@@ -689,6 +842,60 @@ encryption_policy_out:
return -EOPNOTSUPP;
#endif
}
+ case EXT4_IOC_FSGETXATTR:
+ {
+ struct fsxattr fa;
+
+ memset(&fa, 0, sizeof(struct fsxattr));
+ ext4_get_inode_flags(ei);
+ fa.fsx_xflags = ext4_iflags_to_xflags(ei->i_flags & EXT4_FL_USER_VISIBLE);
+
+ if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+ EXT4_FEATURE_RO_COMPAT_PROJECT)) {
+ fa.fsx_projid = (__u32)from_kprojid(&init_user_ns,
+ EXT4_I(inode)->i_projid);
+ }
+
+ if (copy_to_user((struct fsxattr __user *)arg,
+ &fa, sizeof(fa)))
+ return -EFAULT;
+ return 0;
+ }
+ case EXT4_IOC_FSSETXATTR:
+ {
+ struct fsxattr fa;
+ int err;
+
+ if (copy_from_user(&fa, (struct fsxattr __user *)arg,
+ sizeof(fa)))
+ return -EFAULT;
+
+ /* Make sure caller has proper permission */
+ if (!inode_owner_or_capable(inode))
+ return -EACCES;
+
+ err = mnt_want_write_file(filp);
+ if (err)
+ return err;
+
+ flags = ext4_xflags_to_iflags(fa.fsx_xflags);
+ flags = ext4_mask_flags(inode->i_mode, flags);
+
+ inode_lock(inode);
+ flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) |
+ (flags & EXT4_FL_XFLAG_VISIBLE);
+ err = ext4_ioctl_setflags(inode, flags);
+ inode_unlock(inode);
+ mnt_drop_write_file(filp);
+ if (err)
+ return err;
+
+ err = ext4_ioctl_setproject(filp, fa.fsx_projid);
+ if (err)
+ return err;
+
+ return 0;
+ }
default:
return -ENOTTY;
}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 61eaf74dc..4424b7bf8 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2285,7 +2285,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
if (group == 0)
seq_puts(seq, "#group: free frags first ["
" 2^0 2^1 2^2 2^3 2^4 2^5 2^6 "
- " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]");
+ " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n");
i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
sizeof(struct ext4_group_info);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index e032a0423..4098acc70 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -390,6 +390,7 @@ data_copy:
*err = ext4_get_block(orig_inode, orig_blk_offset + i, bh, 0);
if (*err < 0)
break;
+ bh = bh->b_this_page;
}
if (!*err)
*err = block_commit_write(pagep[0], from, from + replaced_size);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a969ab39f..48e4b8907 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -273,7 +273,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
struct ext4_filename *fname,
struct ext4_dir_entry_2 **res_dir);
static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
- struct dentry *dentry, struct inode *inode);
+ struct inode *dir, struct inode *inode);
/* checksumming functions */
void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
@@ -1558,6 +1558,24 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
struct ext4_dir_entry_2 *de;
struct buffer_head *bh;
+ if (ext4_encrypted_inode(dir)) {
+ int res = ext4_get_encryption_info(dir);
+
+ /*
+ * This should be a properly defined flag for
+ * dentry->d_flags when we uplift this to the VFS.
+ * d_fsdata is set to (void *) 1 if if the dentry is
+ * created while the directory was encrypted and we
+ * don't have access to the key.
+ */
+ dentry->d_fsdata = NULL;
+ if (ext4_encryption_info(dir))
+ dentry->d_fsdata = (void *) 1;
+ d_set_d_op(dentry, &ext4_encrypted_d_ops);
+ if (res && res != -ENOKEY)
+ return ERR_PTR(res);
+ }
+
if (dentry->d_name.len > EXT4_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
@@ -1585,11 +1603,15 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
return ERR_PTR(-EFSCORRUPTED);
}
if (!IS_ERR(inode) && ext4_encrypted_inode(dir) &&
- (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
- S_ISLNK(inode->i_mode)) &&
+ (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
!ext4_is_child_context_consistent_with_parent(dir,
inode)) {
+ int nokey = ext4_encrypted_inode(inode) &&
+ !ext4_encryption_info(inode);
+
iput(inode);
+ if (nokey)
+ return ERR_PTR(-ENOKEY);
ext4_warning(inode->i_sb,
"Inconsistent encryption contexts: %lu/%lu\n",
(unsigned long) dir->i_ino,
@@ -1928,10 +1950,9 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
* directory, and adds the dentry to the indexed directory.
*/
static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
- struct dentry *dentry,
+ struct inode *dir,
struct inode *inode, struct buffer_head *bh)
{
- struct inode *dir = d_inode(dentry->d_parent);
struct buffer_head *bh2;
struct dx_root *root;
struct dx_frame frames[2], *frame;
@@ -2086,8 +2107,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
return retval;
if (ext4_has_inline_data(dir)) {
- retval = ext4_try_add_inline_entry(handle, &fname,
- dentry, inode);
+ retval = ext4_try_add_inline_entry(handle, &fname, dir, inode);
if (retval < 0)
goto out;
if (retval == 1) {
@@ -2097,7 +2117,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
}
if (is_dx(dir)) {
- retval = ext4_dx_add_entry(handle, &fname, dentry, inode);
+ retval = ext4_dx_add_entry(handle, &fname, dir, inode);
if (!retval || (retval != ERR_BAD_DX_DIR))
goto out;
ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
@@ -2119,7 +2139,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
if (blocks == 1 && !dx_fallback &&
ext4_has_feature_dir_index(sb)) {
- retval = make_indexed_dir(handle, &fname, dentry,
+ retval = make_indexed_dir(handle, &fname, dir,
inode, bh);
bh = NULL; /* make_indexed_dir releases bh */
goto out;
@@ -2154,12 +2174,11 @@ out:
* Returns 0 for success, or a negative error value
*/
static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
- struct dentry *dentry, struct inode *inode)
+ struct inode *dir, struct inode *inode)
{
struct dx_frame frames[2], *frame;
struct dx_entry *entries, *at;
struct buffer_head *bh;
- struct inode *dir = d_inode(dentry->d_parent);
struct super_block *sb = dir->i_sb;
struct ext4_dir_entry_2 *de;
int err;
@@ -2756,7 +2775,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
return 0;
WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
- !mutex_is_locked(&inode->i_mutex));
+ !inode_is_locked(inode));
/*
* Exit early if inode already is on orphan list. This is a big speedup
* since we don't have to contend on the global s_orphan_lock.
@@ -2838,7 +2857,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
return 0;
WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
- !mutex_is_locked(&inode->i_mutex));
+ !inode_is_locked(inode));
/* Do this quick check before taking global s_orphan_lock. */
if (list_empty(&ei->i_orphan))
return 0;
@@ -3132,6 +3151,7 @@ static int ext4_symlink(struct inode *dir,
if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
if (!encryption_required)
inode->i_op = &ext4_symlink_inode_operations;
+ inode_nohighmem(inode);
ext4_set_aops(inode);
/*
* We cannot call page_symlink() with transaction started
@@ -3211,6 +3231,12 @@ static int ext4_link(struct dentry *old_dentry,
if (ext4_encrypted_inode(dir) &&
!ext4_is_child_context_consistent_with_parent(dir, inode))
return -EPERM;
+
+ if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
+ (!projid_eq(EXT4_I(dir)->i_projid,
+ EXT4_I(old_dentry->d_inode)->i_projid)))
+ return -EXDEV;
+
err = dquot_initialize(dir);
if (err)
return err;
@@ -3491,6 +3517,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
int credits;
u8 old_file_type;
+ if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT)) &&
+ (!projid_eq(EXT4_I(new_dir)->i_projid,
+ EXT4_I(old_dentry->d_inode)->i_projid)))
+ return -EXDEV;
+
retval = dquot_initialize(old.dir);
if (retval)
return retval;
@@ -3700,6 +3731,14 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
new.inode)))
return -EPERM;
+ if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
+ !projid_eq(EXT4_I(new_dir)->i_projid,
+ EXT4_I(old_dentry->d_inode)->i_projid)) ||
+ (ext4_test_inode_flag(old_dir, EXT4_INODE_PROJINHERIT) &&
+ !projid_eq(EXT4_I(old_dir)->i_projid,
+ EXT4_I(new_dentry->d_inode)->i_projid)))
+ return -EXDEV;
+
retval = dquot_initialize(old.dir);
if (retval)
return retval;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 17fbe3882..090b34986 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -52,9 +52,8 @@ void ext4_exit_pageio(void)
*/
static void buffer_io_error(struct buffer_head *bh)
{
- char b[BDEVNAME_SIZE];
- printk_ratelimited(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n",
- bdevname(bh->b_bdev, b),
+ printk_ratelimited(KERN_ERR "Buffer I/O error on device %pg, logical block %llu\n",
+ bh->b_bdev,
(unsigned long long)bh->b_blocknr);
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c9ab67da6..3ed01ec01 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -80,6 +80,36 @@ static void ext4_destroy_lazyinit_thread(void);
static void ext4_unregister_li_request(struct super_block *sb);
static void ext4_clear_request_list(void);
+/*
+ * Lock ordering
+ *
+ * Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and
+ * i_mmap_rwsem (inode->i_mmap_rwsem)!
+ *
+ * page fault path:
+ * mmap_sem -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
+ * page lock -> i_data_sem (rw)
+ *
+ * buffered write path:
+ * sb_start_write -> i_mutex -> mmap_sem
+ * sb_start_write -> i_mutex -> transaction start -> page lock ->
+ * i_data_sem (rw)
+ *
+ * truncate:
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
+ * i_mmap_rwsem (w) -> page lock
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
+ * transaction start -> i_data_sem (rw)
+ *
+ * direct IO:
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) -> mmap_sem
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) ->
+ * transaction start -> i_data_sem (rw)
+ *
+ * writepages:
+ * transaction start -> page lock(s) -> i_data_sem (rw)
+ */
+
#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
static struct file_system_type ext2_fs_type = {
.owner = THIS_MODULE,
@@ -958,6 +988,7 @@ static void init_once(void *foo)
INIT_LIST_HEAD(&ei->i_orphan);
init_rwsem(&ei->xattr_sem);
init_rwsem(&ei->i_data_sem);
+ init_rwsem(&ei->i_mmap_sem);
inode_init_once(&ei->vfs_inode);
}
@@ -966,7 +997,7 @@ static int __init init_inodecache(void)
ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
sizeof(struct ext4_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
init_once);
if (ext4_inode_cachep == NULL)
return -ENOMEM;
@@ -1066,8 +1097,8 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
}
#ifdef CONFIG_QUOTA
-#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
-#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
+static char *quotatypes[] = INITQFNAMES;
+#define QTYPE2NAME(t) (quotatypes[t])
static int ext4_write_dquot(struct dquot *dquot);
static int ext4_acquire_dquot(struct dquot *dquot);
@@ -1100,6 +1131,7 @@ static const struct dquot_operations ext4_quota_operations = {
.write_info = ext4_write_info,
.alloc_dquot = dquot_alloc,
.destroy_dquot = dquot_destroy,
+ .get_projid = ext4_get_projid,
};
static const struct quotactl_ops ext4_qctl_operations = {
@@ -2254,10 +2286,10 @@ static void ext4_orphan_cleanup(struct super_block *sb,
__func__, inode->i_ino, inode->i_size);
jbd_debug(2, "truncating inode %lu to %lld bytes\n",
inode->i_ino, inode->i_size);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
truncate_inode_pages(inode->i_mapping, inode->i_size);
ext4_truncate(inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
nr_truncates++;
} else {
if (test_opt(sb, DEBUG))
@@ -2526,6 +2558,12 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
"without CONFIG_QUOTA");
return 0;
}
+ if (ext4_has_feature_project(sb) && !readonly) {
+ ext4_msg(sb, KERN_ERR,
+ "Filesystem with project quota feature cannot be mounted RDWR "
+ "without CONFIG_QUOTA");
+ return 0;
+ }
#endif /* CONFIG_QUOTA */
return 1;
}
@@ -3654,7 +3692,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sb->s_qcop = &dquot_quotactl_sysfile_ops;
else
sb->s_qcop = &ext4_qctl_operations;
- sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
+ sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
#endif
memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
@@ -4790,6 +4828,48 @@ restore_opts:
return err;
}
+#ifdef CONFIG_QUOTA
+static int ext4_statfs_project(struct super_block *sb,
+ kprojid_t projid, struct kstatfs *buf)
+{
+ struct kqid qid;
+ struct dquot *dquot;
+ u64 limit;
+ u64 curblock;
+
+ qid = make_kqid_projid(projid);
+ dquot = dqget(sb, qid);
+ if (IS_ERR(dquot))
+ return PTR_ERR(dquot);
+ spin_lock(&dq_data_lock);
+
+ limit = (dquot->dq_dqb.dqb_bsoftlimit ?
+ dquot->dq_dqb.dqb_bsoftlimit :
+ dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits;
+ if (limit && buf->f_blocks > limit) {
+ curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
+ buf->f_blocks = limit;
+ buf->f_bfree = buf->f_bavail =
+ (buf->f_blocks > curblock) ?
+ (buf->f_blocks - curblock) : 0;
+ }
+
+ limit = dquot->dq_dqb.dqb_isoftlimit ?
+ dquot->dq_dqb.dqb_isoftlimit :
+ dquot->dq_dqb.dqb_ihardlimit;
+ if (limit && buf->f_files > limit) {
+ buf->f_files = limit;
+ buf->f_ffree =
+ (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
+ (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
+ }
+
+ spin_unlock(&dq_data_lock);
+ dqput(dquot);
+ return 0;
+}
+#endif
+
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
@@ -4822,6 +4902,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
+#ifdef CONFIG_QUOTA
+ if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
+ sb_has_quota_limits_enabled(sb, PRJQUOTA))
+ ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
+#endif
return 0;
}
@@ -4986,7 +5071,8 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
struct inode *qf_inode;
unsigned long qf_inums[EXT4_MAXQUOTAS] = {
le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
- le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
};
BUG_ON(!ext4_has_feature_quota(sb));
@@ -5014,7 +5100,8 @@ static int ext4_enable_quotas(struct super_block *sb)
int type, err = 0;
unsigned long qf_inums[EXT4_MAXQUOTAS] = {
le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
- le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
};
sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index e8e7af62a..6f7ee30a8 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -23,17 +23,21 @@
#include "xattr.h"
#ifdef CONFIG_EXT4_FS_ENCRYPTION
-static const char *ext4_encrypted_follow_link(struct dentry *dentry, void **cookie)
+static const char *ext4_encrypted_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
struct page *cpage = NULL;
char *caddr, *paddr = NULL;
struct ext4_str cstr, pstr;
- struct inode *inode = d_inode(dentry);
struct ext4_encrypted_symlink_data *sd;
loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
int res;
u32 plen, max_size = inode->i_sb->s_blocksize;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
res = ext4_get_encryption_info(inode);
if (res)
return ERR_PTR(res);
@@ -45,7 +49,7 @@ static const char *ext4_encrypted_follow_link(struct dentry *dentry, void **cook
cpage = read_mapping_page(inode->i_mapping, 0, NULL);
if (IS_ERR(cpage))
return ERR_CAST(cpage);
- caddr = kmap(cpage);
+ caddr = page_address(cpage);
caddr[size] = 0;
}
@@ -75,24 +79,20 @@ static const char *ext4_encrypted_follow_link(struct dentry *dentry, void **cook
/* Null-terminate the name */
if (res <= plen)
paddr[res] = '\0';
- if (cpage) {
- kunmap(cpage);
+ if (cpage)
page_cache_release(cpage);
- }
- return *cookie = paddr;
+ set_delayed_call(done, kfree_link, paddr);
+ return paddr;
errout:
- if (cpage) {
- kunmap(cpage);
+ if (cpage)
page_cache_release(cpage);
- }
kfree(paddr);
return ERR_PTR(res);
}
const struct inode_operations ext4_encrypted_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = ext4_encrypted_follow_link,
- .put_link = kfree_put_link,
+ .get_link = ext4_encrypted_get_link,
.setattr = ext4_setattr,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
@@ -103,8 +103,7 @@ const struct inode_operations ext4_encrypted_symlink_inode_operations = {
const struct inode_operations ext4_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.setattr = ext4_setattr,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
@@ -114,7 +113,7 @@ const struct inode_operations ext4_symlink_inode_operations = {
const struct inode_operations ext4_fast_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = simple_follow_link,
+ .get_link = simple_get_link,
.setattr = ext4_setattr,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h
index 011ba6670..c70d06a38 100644
--- a/fs/ext4/truncate.h
+++ b/fs/ext4/truncate.h
@@ -10,8 +10,10 @@
*/
static inline void ext4_truncate_failed_write(struct inode *inode)
{
+ down_write(&EXT4_I(inode)->i_mmap_sem);
truncate_inode_pages(inode->i_mapping, inode->i_size);
ext4_truncate(inode);
+ up_write(&EXT4_I(inode)->i_mmap_sem);
}
/*
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 6b6b3e751..a95151e87 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -68,10 +68,8 @@
printk("\n"); \
} while (0)
# define ea_bdebug(bh, f...) do { \
- char b[BDEVNAME_SIZE]; \
- printk(KERN_DEBUG "block %s:%lu: ", \
- bdevname(bh->b_bdev, b), \
- (unsigned long) bh->b_blocknr); \
+ printk(KERN_DEBUG "block %pg:%lu: ", \
+ bh->b_bdev, (unsigned long) bh->b_blocknr); \
printk(f); \
printk("\n"); \
} while (0)
@@ -404,19 +402,24 @@ ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry,
const struct xattr_handler *handler =
ext4_xattr_handler(entry->e_name_index);
- if (handler) {
- size_t size = handler->list(handler, dentry, buffer,
- rest, entry->e_name,
- entry->e_name_len);
+ if (handler && (!handler->list || handler->list(dentry))) {
+ const char *prefix = handler->prefix ?: handler->name;
+ size_t prefix_len = strlen(prefix);
+ size_t size = prefix_len + entry->e_name_len + 1;
+
if (buffer) {
if (size > rest)
return -ERANGE;
- buffer += size;
+ memcpy(buffer, prefix, prefix_len);
+ buffer += prefix_len;
+ memcpy(buffer, entry->e_name, entry->e_name_len);
+ buffer += entry->e_name_len;
+ *buffer++ = 0;
}
rest -= size;
}
}
- return buffer_size - rest;
+ return buffer_size - rest; /* total size */
}
static int
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index 36f4c1a84..3e81bdca0 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -11,30 +11,11 @@
#include "ext4.h"
#include "xattr.h"
-static size_t
-ext4_xattr_security_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
-{
- const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
- const size_t total_len = prefix_len + name_len + 1;
-
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
- memcpy(list+prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
-}
-
static int
ext4_xattr_security_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ext4_xattr_get(d_inode(dentry), EXT4_XATTR_INDEX_SECURITY,
name, buffer, size);
}
@@ -44,8 +25,6 @@ ext4_xattr_security_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ext4_xattr_set(d_inode(dentry), EXT4_XATTR_INDEX_SECURITY,
name, value, size, flags);
}
@@ -79,7 +58,6 @@ ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir,
const struct xattr_handler ext4_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
- .list = ext4_xattr_security_list,
.get = ext4_xattr_security_get,
.set = ext4_xattr_security_set,
};
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index 488089053..2a3c6f9b8 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -12,23 +12,10 @@
#include "ext4.h"
#include "xattr.h"
-static size_t
-ext4_xattr_trusted_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
+static bool
+ext4_xattr_trusted_list(struct dentry *dentry)
{
- const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
-
- if (!capable(CAP_SYS_ADMIN))
- return 0;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
- memcpy(list+prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
+ return capable(CAP_SYS_ADMIN);
}
static int
@@ -36,8 +23,6 @@ ext4_xattr_trusted_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name, void *buffer,
size_t size)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ext4_xattr_get(d_inode(dentry), EXT4_XATTR_INDEX_TRUSTED,
name, buffer, size);
}
@@ -47,8 +32,6 @@ ext4_xattr_trusted_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ext4_xattr_set(d_inode(dentry), EXT4_XATTR_INDEX_TRUSTED,
name, value, size, flags);
}
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index d2dec3364..d152f431e 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -11,23 +11,10 @@
#include "ext4.h"
#include "xattr.h"
-static size_t
-ext4_xattr_user_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
+static bool
+ext4_xattr_user_list(struct dentry *dentry)
{
- const size_t prefix_len = XATTR_USER_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
-
- if (!test_opt(dentry->d_sb, XATTR_USER))
- return 0;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_USER_PREFIX, prefix_len);
- memcpy(list+prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
+ return test_opt(dentry->d_sb, XATTR_USER);
}
static int
@@ -35,8 +22,6 @@ ext4_xattr_user_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
if (!test_opt(dentry->d_sb, XATTR_USER))
return -EOPNOTSUPP;
return ext4_xattr_get(d_inode(dentry), EXT4_XATTR_INDEX_USER,
@@ -48,8 +33,6 @@ ext4_xattr_user_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
if (!test_opt(dentry->d_sb, XATTR_USER))
return -EOPNOTSUPP;
return ext4_xattr_set(d_inode(dentry), EXT4_XATTR_INDEX_USER,
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f661d8047..3842af954 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -237,7 +237,7 @@ static int f2fs_write_meta_page(struct page *page,
dec_page_count(sbi, F2FS_DIRTY_META);
unlock_page(page);
- if (wbc->for_reclaim)
+ if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi)))
f2fs_submit_merged_bio(sbi, META, WRITE);
return 0;
@@ -410,13 +410,13 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
spin_unlock(&im->ino_lock);
}
-void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
+void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
/* add new dirty ino entry into list */
__add_ino_entry(sbi, ino, type);
}
-void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
+void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
/* remove dirty ino entry from list */
__remove_ino_entry(sbi, ino, type);
@@ -434,7 +434,7 @@ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
return e ? true : false;
}
-void release_dirty_inode(struct f2fs_sb_info *sbi)
+void release_ino_entry(struct f2fs_sb_info *sbi)
{
struct ino_entry *e, *tmp;
int i;
@@ -722,47 +722,48 @@ fail_no_cp:
return -EINVAL;
}
-static int __add_dirty_inode(struct inode *inode, struct inode_entry *new)
+static void __add_dirty_inode(struct inode *inode, enum inode_type type)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
- if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR))
- return -EEXIST;
+ if (is_inode_flag_set(fi, flag))
+ return;
- set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
- F2FS_I(inode)->dirty_dir = new;
- list_add_tail(&new->list, &sbi->dir_inode_list);
- stat_inc_dirty_dir(sbi);
- return 0;
+ set_inode_flag(fi, flag);
+ list_add_tail(&fi->dirty_list, &sbi->inode_list[type]);
+ stat_inc_dirty_inode(sbi, type);
+}
+
+static void __remove_dirty_inode(struct inode *inode, enum inode_type type)
+{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
+
+ if (get_dirty_pages(inode) ||
+ !is_inode_flag_set(F2FS_I(inode), flag))
+ return;
+
+ list_del_init(&fi->dirty_list);
+ clear_inode_flag(fi, flag);
+ stat_dec_dirty_inode(F2FS_I_SB(inode), type);
}
void update_dirty_page(struct inode *inode, struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct inode_entry *new;
- int ret = 0;
+ enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
!S_ISLNK(inode->i_mode))
return;
- if (!S_ISDIR(inode->i_mode)) {
- inode_inc_dirty_pages(inode);
- goto out;
- }
-
- new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
- new->inode = inode;
- INIT_LIST_HEAD(&new->list);
-
- spin_lock(&sbi->dir_inode_lock);
- ret = __add_dirty_inode(inode, new);
+ spin_lock(&sbi->inode_lock[type]);
+ __add_dirty_inode(inode, type);
inode_inc_dirty_pages(inode);
- spin_unlock(&sbi->dir_inode_lock);
+ spin_unlock(&sbi->inode_lock[type]);
- if (ret)
- kmem_cache_free(inode_entry_slab, new);
-out:
SetPagePrivate(page);
f2fs_trace_pid(page);
}
@@ -770,70 +771,60 @@ out:
void add_dirty_dir_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct inode_entry *new =
- f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
- int ret = 0;
-
- new->inode = inode;
- INIT_LIST_HEAD(&new->list);
- spin_lock(&sbi->dir_inode_lock);
- ret = __add_dirty_inode(inode, new);
- spin_unlock(&sbi->dir_inode_lock);
-
- if (ret)
- kmem_cache_free(inode_entry_slab, new);
+ spin_lock(&sbi->inode_lock[DIR_INODE]);
+ __add_dirty_inode(inode, DIR_INODE);
+ spin_unlock(&sbi->inode_lock[DIR_INODE]);
}
-void remove_dirty_dir_inode(struct inode *inode)
+void remove_dirty_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct inode_entry *entry;
-
- if (!S_ISDIR(inode->i_mode))
- return;
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
- spin_lock(&sbi->dir_inode_lock);
- if (get_dirty_pages(inode) ||
- !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) {
- spin_unlock(&sbi->dir_inode_lock);
+ if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
+ !S_ISLNK(inode->i_mode))
return;
- }
- entry = F2FS_I(inode)->dirty_dir;
- list_del(&entry->list);
- F2FS_I(inode)->dirty_dir = NULL;
- clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
- stat_dec_dirty_dir(sbi);
- spin_unlock(&sbi->dir_inode_lock);
- kmem_cache_free(inode_entry_slab, entry);
+ spin_lock(&sbi->inode_lock[type]);
+ __remove_dirty_inode(inode, type);
+ spin_unlock(&sbi->inode_lock[type]);
/* Only from the recovery routine */
- if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
- clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
+ if (is_inode_flag_set(fi, FI_DELAY_IPUT)) {
+ clear_inode_flag(fi, FI_DELAY_IPUT);
iput(inode);
}
}
-void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
+int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
{
struct list_head *head;
- struct inode_entry *entry;
struct inode *inode;
+ struct f2fs_inode_info *fi;
+ bool is_dir = (type == DIR_INODE);
+
+ trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir,
+ get_pages(sbi, is_dir ?
+ F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
retry:
if (unlikely(f2fs_cp_error(sbi)))
- return;
+ return -EIO;
- spin_lock(&sbi->dir_inode_lock);
+ spin_lock(&sbi->inode_lock[type]);
- head = &sbi->dir_inode_list;
+ head = &sbi->inode_list[type];
if (list_empty(head)) {
- spin_unlock(&sbi->dir_inode_lock);
- return;
+ spin_unlock(&sbi->inode_lock[type]);
+ trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir,
+ get_pages(sbi, is_dir ?
+ F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
+ return 0;
}
- entry = list_entry(head->next, struct inode_entry, list);
- inode = igrab(entry->inode);
- spin_unlock(&sbi->dir_inode_lock);
+ fi = list_entry(head->next, struct f2fs_inode_info, dirty_list);
+ inode = igrab(&fi->vfs_inode);
+ spin_unlock(&sbi->inode_lock[type]);
if (inode) {
filemap_fdatawrite(inode->i_mapping);
iput(inode);
@@ -868,11 +859,9 @@ retry_flush_dents:
/* write all the dirty dentry pages */
if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
f2fs_unlock_all(sbi);
- sync_dirty_dir_inodes(sbi);
- if (unlikely(f2fs_cp_error(sbi))) {
- err = -EIO;
+ err = sync_dirty_inodes(sbi, DIR_INODE);
+ if (err)
goto out;
- }
goto retry_flush_dents;
}
@@ -885,10 +874,9 @@ retry_flush_nodes:
if (get_pages(sbi, F2FS_DIRTY_NODES)) {
up_write(&sbi->node_write);
- sync_node_pages(sbi, 0, &wbc);
- if (unlikely(f2fs_cp_error(sbi))) {
+ err = sync_node_pages(sbi, 0, &wbc);
+ if (err) {
f2fs_unlock_all(sbi);
- err = -EIO;
goto out;
}
goto retry_flush_nodes;
@@ -919,7 +907,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
finish_wait(&sbi->cp_wait, &wait);
}
-static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
@@ -945,7 +933,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
while (get_pages(sbi, F2FS_DIRTY_META)) {
sync_meta_pages(sbi, META, LONG_MAX);
if (unlikely(f2fs_cp_error(sbi)))
- return;
+ return -EIO;
}
next_free_nid(sbi, &last_nid);
@@ -1030,7 +1018,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* need to wait for end_io results */
wait_on_all_pages_writeback(sbi);
if (unlikely(f2fs_cp_error(sbi)))
- return;
+ return -EIO;
/* write out checkpoint buffer at block 0 */
update_meta_page(sbi, ckpt, start_blk++);
@@ -1058,7 +1046,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
wait_on_all_pages_writeback(sbi);
if (unlikely(f2fs_cp_error(sbi)))
- return;
+ return -EIO;
filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
@@ -1081,22 +1069,25 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
invalidate_mapping_pages(META_MAPPING(sbi), discard_blk,
discard_blk);
- release_dirty_inode(sbi);
+ release_ino_entry(sbi);
if (unlikely(f2fs_cp_error(sbi)))
- return;
+ return -EIO;
clear_prefree_segments(sbi, cpc);
clear_sbi_flag(sbi, SBI_IS_DIRTY);
+
+ return 0;
}
/*
* We guarantee that this checkpoint procedure will not fail.
*/
-void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
unsigned long long ckpt_ver;
+ int err = 0;
mutex_lock(&sbi->cp_mutex);
@@ -1104,14 +1095,19 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
(cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC ||
(cpc->reason == CP_DISCARD && !sbi->discard_blks)))
goto out;
- if (unlikely(f2fs_cp_error(sbi)))
+ if (unlikely(f2fs_cp_error(sbi))) {
+ err = -EIO;
goto out;
- if (f2fs_readonly(sbi->sb))
+ }
+ if (f2fs_readonly(sbi->sb)) {
+ err = -EROFS;
goto out;
+ }
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
- if (block_operations(sbi))
+ err = block_operations(sbi);
+ if (err)
goto out;
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
@@ -1133,7 +1129,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
flush_sit_entries(sbi, cpc);
/* unlock all the fs_lock[] in do_checkpoint() */
- do_checkpoint(sbi, cpc);
+ err = do_checkpoint(sbi, cpc);
unblock_operations(sbi);
stat_inc_cp_count(sbi->stat_info);
@@ -1143,10 +1139,11 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
"checkpoint: version = %llx", ckpt_ver);
/* do checkpoint periodically */
- sbi->cp_expires = round_jiffies_up(jiffies + HZ * sbi->cp_interval);
+ f2fs_update_time(sbi, CP_TIME);
+ trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
out:
mutex_unlock(&sbi->cp_mutex);
- trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
+ return err;
}
void init_ino_entry_info(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 972eab7ac..5c06db17e 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -225,7 +225,8 @@ void set_data_blkaddr(struct dnode_of_data *dn)
/* Get physical address of data block */
addr_array = blkaddr_in_node(rn);
addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
- set_page_dirty(node_page);
+ if (set_page_dirty(node_page))
+ dn->node_changed = true;
}
int reserve_new_block(struct dnode_of_data *dn)
@@ -412,7 +413,7 @@ struct page *get_new_data_page(struct inode *inode,
struct page *page;
struct dnode_of_data dn;
int err;
-repeat:
+
page = f2fs_grab_cache_page(mapping, index, true);
if (!page) {
/*
@@ -441,12 +442,11 @@ repeat:
} else {
f2fs_put_page(page, 1);
- page = get_read_data_page(inode, index, READ_SYNC, true);
+ /* if ipage exists, blkaddr should be NEW_ADDR */
+ f2fs_bug_on(F2FS_I_SB(inode), ipage);
+ page = get_lock_data_page(inode, index, true);
if (IS_ERR(page))
- goto repeat;
-
- /* wait for read completion */
- lock_page(page);
+ return page;
}
got_it:
if (new_i_size && i_size_read(inode) <
@@ -494,14 +494,10 @@ alloc:
if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT))
i_size_write(dn->inode,
((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT));
-
- /* direct IO doesn't use extent cache to maximize the performance */
- f2fs_drop_largest_extent(dn->inode, fofs);
-
return 0;
}
-static void __allocate_data_blocks(struct inode *inode, loff_t offset,
+static int __allocate_data_blocks(struct inode *inode, loff_t offset,
size_t count)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -510,14 +506,15 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
u64 len = F2FS_BYTES_TO_BLK(count);
bool allocated;
u64 end_offset;
+ int err = 0;
while (len) {
- f2fs_balance_fs(sbi);
f2fs_lock_op(sbi);
/* When reading holes, we need its node page */
set_new_dnode(&dn, inode, NULL, NULL, 0);
- if (get_dnode_of_data(&dn, start, ALLOC_NODE))
+ err = get_dnode_of_data(&dn, start, ALLOC_NODE);
+ if (err)
goto out;
allocated = false;
@@ -526,12 +523,15 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
while (dn.ofs_in_node < end_offset && len) {
block_t blkaddr;
- if (unlikely(f2fs_cp_error(sbi)))
+ if (unlikely(f2fs_cp_error(sbi))) {
+ err = -EIO;
goto sync_out;
+ }
blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) {
- if (__allocate_data_block(&dn))
+ err = __allocate_data_block(&dn);
+ if (err)
goto sync_out;
allocated = true;
}
@@ -545,8 +545,10 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
+
+ f2fs_balance_fs(sbi, dn.node_changed);
}
- return;
+ return err;
sync_out:
if (allocated)
@@ -554,7 +556,8 @@ sync_out:
f2fs_put_dnode(&dn);
out:
f2fs_unlock_op(sbi);
- return;
+ f2fs_balance_fs(sbi, dn.node_changed);
+ return err;
}
/*
@@ -566,7 +569,7 @@ out:
* b. do not use extent cache for better performance
* c. give the block addresses to blockdev
*/
-static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
+int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
int create, int flag)
{
unsigned int maxblocks = map->m_len;
@@ -577,6 +580,7 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
int err = 0, ofs = 1;
struct extent_info ei;
bool allocated = false;
+ block_t blkaddr;
map->m_len = 0;
map->m_flags = 0;
@@ -592,7 +596,7 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
}
if (create)
- f2fs_lock_op(F2FS_I_SB(inode));
+ f2fs_lock_op(sbi);
/* When reading holes, we need its node page */
set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -640,12 +644,21 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
pgofs++;
get_next:
+ if (map->m_len >= maxblocks)
+ goto sync_out;
+
if (dn.ofs_in_node >= end_offset) {
if (allocated)
sync_inode_page(&dn);
allocated = false;
f2fs_put_dnode(&dn);
+ if (create) {
+ f2fs_unlock_op(sbi);
+ f2fs_balance_fs(sbi, dn.node_changed);
+ f2fs_lock_op(sbi);
+ }
+
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, pgofs, mode);
if (err) {
@@ -657,52 +670,53 @@ get_next:
end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
}
- if (maxblocks > map->m_len) {
- block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+ blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
- if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
- if (create) {
- if (unlikely(f2fs_cp_error(sbi))) {
- err = -EIO;
- goto sync_out;
- }
- err = __allocate_data_block(&dn);
- if (err)
- goto sync_out;
- allocated = true;
- map->m_flags |= F2FS_MAP_NEW;
- blkaddr = dn.data_blkaddr;
- } else {
- /*
- * we only merge preallocated unwritten blocks
- * for fiemap.
- */
- if (flag != F2FS_GET_BLOCK_FIEMAP ||
- blkaddr != NEW_ADDR)
- goto sync_out;
+ if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
+ if (create) {
+ if (unlikely(f2fs_cp_error(sbi))) {
+ err = -EIO;
+ goto sync_out;
}
+ err = __allocate_data_block(&dn);
+ if (err)
+ goto sync_out;
+ allocated = true;
+ map->m_flags |= F2FS_MAP_NEW;
+ blkaddr = dn.data_blkaddr;
+ } else {
+ /*
+ * we only merge preallocated unwritten blocks
+ * for fiemap.
+ */
+ if (flag != F2FS_GET_BLOCK_FIEMAP ||
+ blkaddr != NEW_ADDR)
+ goto sync_out;
}
+ }
- /* Give more consecutive addresses for the readahead */
- if ((map->m_pblk != NEW_ADDR &&
- blkaddr == (map->m_pblk + ofs)) ||
- (map->m_pblk == NEW_ADDR &&
- blkaddr == NEW_ADDR)) {
- ofs++;
- dn.ofs_in_node++;
- pgofs++;
- map->m_len++;
- goto get_next;
- }
+ /* Give more consecutive addresses for the readahead */
+ if ((map->m_pblk != NEW_ADDR &&
+ blkaddr == (map->m_pblk + ofs)) ||
+ (map->m_pblk == NEW_ADDR &&
+ blkaddr == NEW_ADDR)) {
+ ofs++;
+ dn.ofs_in_node++;
+ pgofs++;
+ map->m_len++;
+ goto get_next;
}
+
sync_out:
if (allocated)
sync_inode_page(&dn);
put_out:
f2fs_put_dnode(&dn);
unlock_out:
- if (create)
- f2fs_unlock_op(F2FS_I_SB(inode));
+ if (create) {
+ f2fs_unlock_op(sbi);
+ f2fs_balance_fs(sbi, dn.node_changed);
+ }
out:
trace_f2fs_map_blocks(inode, map, err);
return err;
@@ -742,6 +756,10 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock,
static int get_data_block_bmap(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
+ /* Block number less than F2FS MAX BLOCKS */
+ if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
+ return -EFBIG;
+
return __get_data_block(inode, iblock, bh_result, create,
F2FS_GET_BLOCK_BMAP);
}
@@ -761,10 +779,9 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
{
struct buffer_head map_bh;
sector_t start_blk, last_blk;
- loff_t isize = i_size_read(inode);
+ loff_t isize;
u64 logical = 0, phys = 0, size = 0;
u32 flags = 0;
- bool past_eof = false, whole_file = false;
int ret = 0;
ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
@@ -777,18 +794,21 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
return ret;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
- if (len >= isize) {
- whole_file = true;
- len = isize;
- }
+ isize = i_size_read(inode);
+ if (start >= isize)
+ goto out;
+
+ if (start + len > isize)
+ len = isize - start;
if (logical_to_blk(inode, len) == 0)
len = blk_to_logical(inode, 1);
start_blk = logical_to_blk(inode, start);
last_blk = logical_to_blk(inode, start + len - 1);
+
next:
memset(&map_bh, 0, sizeof(struct buffer_head));
map_bh.b_size = len;
@@ -800,59 +820,37 @@ next:
/* HOLE */
if (!buffer_mapped(&map_bh)) {
- start_blk++;
-
- if (!past_eof && blk_to_logical(inode, start_blk) >= isize)
- past_eof = 1;
-
- if (past_eof && size) {
- flags |= FIEMAP_EXTENT_LAST;
- ret = fiemap_fill_next_extent(fieinfo, logical,
- phys, size, flags);
- } else if (size) {
- ret = fiemap_fill_next_extent(fieinfo, logical,
- phys, size, flags);
- size = 0;
- }
+ /* Go through holes util pass the EOF */
+ if (blk_to_logical(inode, start_blk++) < isize)
+ goto prep_next;
+ /* Found a hole beyond isize means no more extents.
+ * Note that the premise is that filesystems don't
+ * punch holes beyond isize and keep size unchanged.
+ */
+ flags |= FIEMAP_EXTENT_LAST;
+ }
- /* if we have holes up to/past EOF then we're done */
- if (start_blk > last_blk || past_eof || ret)
- goto out;
- } else {
- if (start_blk > last_blk && !whole_file) {
- ret = fiemap_fill_next_extent(fieinfo, logical,
- phys, size, flags);
- goto out;
- }
+ if (size) {
+ if (f2fs_encrypted_inode(inode))
+ flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
- /*
- * if size != 0 then we know we already have an extent
- * to add, so add it.
- */
- if (size) {
- ret = fiemap_fill_next_extent(fieinfo, logical,
- phys, size, flags);
- if (ret)
- goto out;
- }
+ ret = fiemap_fill_next_extent(fieinfo, logical,
+ phys, size, flags);
+ }
- logical = blk_to_logical(inode, start_blk);
- phys = blk_to_logical(inode, map_bh.b_blocknr);
- size = map_bh.b_size;
- flags = 0;
- if (buffer_unwritten(&map_bh))
- flags = FIEMAP_EXTENT_UNWRITTEN;
+ if (start_blk > last_blk || ret)
+ goto out;
- start_blk += logical_to_blk(inode, size);
+ logical = blk_to_logical(inode, start_blk);
+ phys = blk_to_logical(inode, map_bh.b_blocknr);
+ size = map_bh.b_size;
+ flags = 0;
+ if (buffer_unwritten(&map_bh))
+ flags = FIEMAP_EXTENT_UNWRITTEN;
- /*
- * If we are past the EOF, then we need to make sure as
- * soon as we find a hole that the last extent we found
- * is marked with FIEMAP_EXTENT_LAST
- */
- if (!past_eof && logical + size >= isize)
- past_eof = true;
- }
+ start_blk += logical_to_blk(inode, size);
+
+prep_next:
cond_resched();
if (fatal_signal_pending(current))
ret = -EINTR;
@@ -862,7 +860,7 @@ out:
if (ret == 1)
ret = 0;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
@@ -1083,6 +1081,7 @@ int do_write_data_page(struct f2fs_io_info *fio)
*/
if (unlikely(fio->blk_addr != NEW_ADDR &&
!is_cold_data(page) &&
+ !IS_ATOMIC_WRITTEN_PAGE(page) &&
need_inplace_update(inode))) {
rewrite_data_page(fio);
set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
@@ -1179,10 +1178,11 @@ out:
if (err)
ClearPageUptodate(page);
unlock_page(page);
- if (need_balance_fs)
- f2fs_balance_fs(sbi);
- if (wbc->for_reclaim)
+ f2fs_balance_fs(sbi, need_balance_fs);
+ if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi))) {
f2fs_submit_merged_bio(sbi, DATA, WRITE);
+ remove_dirty_inode(inode);
+ }
return 0;
redirty_out:
@@ -1354,6 +1354,10 @@ static int f2fs_write_data_pages(struct address_space *mapping,
available_free_memory(sbi, DIRTY_DENTS))
goto skip_write;
+ /* skip writing during file defragment */
+ if (is_inode_flag_set(F2FS_I(inode), FI_DO_DEFRAG))
+ goto skip_write;
+
/* during POR, we don't need to trigger writepage at all. */
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto skip_write;
@@ -1369,7 +1373,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
if (locked)
mutex_unlock(&sbi->writepages);
- remove_dirty_dir_inode(inode);
+ remove_dirty_inode(inode);
wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
return ret;
@@ -1382,13 +1386,85 @@ skip_write:
static void f2fs_write_failed(struct address_space *mapping, loff_t to)
{
struct inode *inode = mapping->host;
+ loff_t i_size = i_size_read(inode);
- if (to > inode->i_size) {
- truncate_pagecache(inode, inode->i_size);
- truncate_blocks(inode, inode->i_size, true);
+ if (to > i_size) {
+ truncate_pagecache(inode, i_size);
+ truncate_blocks(inode, i_size, true);
}
}
+static int prepare_write_begin(struct f2fs_sb_info *sbi,
+ struct page *page, loff_t pos, unsigned len,
+ block_t *blk_addr, bool *node_changed)
+{
+ struct inode *inode = page->mapping->host;
+ pgoff_t index = page->index;
+ struct dnode_of_data dn;
+ struct page *ipage;
+ bool locked = false;
+ struct extent_info ei;
+ int err = 0;
+
+ if (f2fs_has_inline_data(inode) ||
+ (pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
+ f2fs_lock_op(sbi);
+ locked = true;
+ }
+restart:
+ /* check inline_data */
+ ipage = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage)) {
+ err = PTR_ERR(ipage);
+ goto unlock_out;
+ }
+
+ set_new_dnode(&dn, inode, ipage, ipage, 0);
+
+ if (f2fs_has_inline_data(inode)) {
+ if (pos + len <= MAX_INLINE_DATA) {
+ read_inline_data(page, ipage);
+ set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+ sync_inode_page(&dn);
+ } else {
+ err = f2fs_convert_inline_page(&dn, page);
+ if (err)
+ goto out;
+ if (dn.data_blkaddr == NULL_ADDR)
+ err = f2fs_get_block(&dn, index);
+ }
+ } else if (locked) {
+ err = f2fs_get_block(&dn, index);
+ } else {
+ if (f2fs_lookup_extent_cache(inode, index, &ei)) {
+ dn.data_blkaddr = ei.blk + index - ei.fofs;
+ } else {
+ bool restart = false;
+
+ /* hole case */
+ err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
+ if (err || (!err && dn.data_blkaddr == NULL_ADDR))
+ restart = true;
+ if (restart) {
+ f2fs_put_dnode(&dn);
+ f2fs_lock_op(sbi);
+ locked = true;
+ goto restart;
+ }
+ }
+ }
+
+ /* convert_inline_page can make node_changed */
+ *blk_addr = dn.data_blkaddr;
+ *node_changed = dn.node_changed;
+out:
+ f2fs_put_dnode(&dn);
+unlock_out:
+ if (locked)
+ f2fs_unlock_op(sbi);
+ return err;
+}
+
static int f2fs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
@@ -1396,15 +1472,13 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
struct inode *inode = mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct page *page = NULL;
- struct page *ipage;
pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
- struct dnode_of_data dn;
+ bool need_balance = false;
+ block_t blkaddr = NULL_ADDR;
int err = 0;
trace_f2fs_write_begin(inode, pos, len, flags);
- f2fs_balance_fs(sbi);
-
/*
* We should check this at this moment to avoid deadlock on inode page
* and #0 page. The locking rule for inline_data conversion should be:
@@ -1424,41 +1498,27 @@ repeat:
*pagep = page;
- f2fs_lock_op(sbi);
-
- /* check inline_data */
- ipage = get_node_page(sbi, inode->i_ino);
- if (IS_ERR(ipage)) {
- err = PTR_ERR(ipage);
- goto unlock_fail;
- }
-
- set_new_dnode(&dn, inode, ipage, ipage, 0);
+ err = prepare_write_begin(sbi, page, pos, len,
+ &blkaddr, &need_balance);
+ if (err)
+ goto fail;
- if (f2fs_has_inline_data(inode)) {
- if (pos + len <= MAX_INLINE_DATA) {
- read_inline_data(page, ipage);
- set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
- sync_inode_page(&dn);
- goto put_next;
+ if (need_balance && has_not_enough_free_secs(sbi, 0)) {
+ unlock_page(page);
+ f2fs_balance_fs(sbi, true);
+ lock_page(page);
+ if (page->mapping != mapping) {
+ /* The page got truncated from under us */
+ f2fs_put_page(page, 1);
+ goto repeat;
}
- err = f2fs_convert_inline_page(&dn, page);
- if (err)
- goto put_fail;
}
- err = f2fs_get_block(&dn, index);
- if (err)
- goto put_fail;
-put_next:
- f2fs_put_dnode(&dn);
- f2fs_unlock_op(sbi);
-
f2fs_wait_on_page_writeback(page, DATA);
/* wait for GCed encrypted page writeback */
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
- f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr);
+ f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr);
if (len == PAGE_CACHE_SIZE)
goto out_update;
@@ -1474,14 +1534,14 @@ put_next:
goto out_update;
}
- if (dn.data_blkaddr == NEW_ADDR) {
+ if (blkaddr == NEW_ADDR) {
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
} else {
struct f2fs_io_info fio = {
.sbi = sbi,
.type = DATA,
.rw = READ_SYNC,
- .blk_addr = dn.data_blkaddr,
+ .blk_addr = blkaddr,
.page = page,
.encrypted_page = NULL,
};
@@ -1512,10 +1572,6 @@ out_clear:
clear_cold_data(page);
return 0;
-put_fail:
- f2fs_put_dnode(&dn);
-unlock_fail:
- f2fs_unlock_op(sbi);
fail:
f2fs_put_page(page, 1);
f2fs_write_failed(mapping, pos + len);
@@ -1540,6 +1596,7 @@ static int f2fs_write_end(struct file *file,
}
f2fs_put_page(page, 1);
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return copied;
}
@@ -1567,11 +1624,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
int err;
/* we don't need to use inline_data strictly */
- if (f2fs_has_inline_data(inode)) {
- err = f2fs_convert_inline_inode(inode);
- if (err)
- return err;
- }
+ err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
return 0;
@@ -1583,11 +1638,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
if (iov_iter_rw(iter) == WRITE) {
- __allocate_data_blocks(inode, offset, count);
- if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
- err = -EIO;
+ err = __allocate_data_blocks(inode, offset, count);
+ if (err)
goto out;
- }
}
err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio);
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 478e5d541..4fb6ef88a 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -38,12 +38,15 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->hit_rbtree = atomic64_read(&sbi->read_hit_rbtree);
si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree;
si->total_ext = atomic64_read(&sbi->total_hit_ext);
- si->ext_tree = sbi->total_ext_tree;
+ si->ext_tree = atomic_read(&sbi->total_ext_tree);
+ si->zombie_tree = atomic_read(&sbi->total_zombie_tree);
si->ext_node = atomic_read(&sbi->total_ext_node);
si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
- si->ndirty_dirs = sbi->n_dirty_dirs;
si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
+ si->ndirty_data = get_pages(sbi, F2FS_DIRTY_DATA);
+ si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
+ si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
si->wb_pages = get_pages(sbi, F2FS_WRITEBACK);
si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
@@ -105,7 +108,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
bimodal = 0;
total_vblocks = 0;
- blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg);
+ blks_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg;
hblks_per_sec = blks_per_sec / 2;
for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
@@ -189,10 +192,10 @@ get_cache:
si->cache_mem += NM_I(sbi)->dirty_nat_cnt *
sizeof(struct nat_entry_set);
si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages);
- si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry);
for (i = 0; i <= UPDATE_INO; i++)
si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
- si->cache_mem += sbi->total_ext_tree * sizeof(struct extent_tree);
+ si->cache_mem += atomic_read(&sbi->total_ext_tree) *
+ sizeof(struct extent_tree);
si->cache_mem += atomic_read(&sbi->total_ext_node) *
sizeof(struct extent_node);
@@ -211,12 +214,10 @@ static int stat_show(struct seq_file *s, void *v)
mutex_lock(&f2fs_stat_mutex);
list_for_each_entry(si, &f2fs_stat_list, stat_list) {
- char devname[BDEVNAME_SIZE];
-
update_general_status(si->sbi);
- seq_printf(s, "\n=====[ partition info(%s). #%d ]=====\n",
- bdevname(si->sbi->sb->s_bdev, devname), i++);
+ seq_printf(s, "\n=====[ partition info(%pg). #%d ]=====\n",
+ si->sbi->sb->s_bdev, i++);
seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
si->sit_area_segs, si->nat_area_segs);
seq_printf(s, "[SSA: %d] [MAIN: %d",
@@ -269,7 +270,8 @@ static int stat_show(struct seq_file *s, void *v)
si->dirty_count);
seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n",
si->prefree_count, si->free_segs, si->free_secs);
- seq_printf(s, "CP calls: %d\n", si->cp_count);
+ seq_printf(s, "CP calls: %d (BG: %d)\n",
+ si->cp_count, si->bg_cp_count);
seq_printf(s, "GC calls: %d (BG: %d)\n",
si->call_count, si->bg_gc);
seq_printf(s, " - data segments : %d (%d)\n",
@@ -290,8 +292,8 @@ static int stat_show(struct seq_file *s, void *v)
!si->total_ext ? 0 :
div64_u64(si->hit_total * 100, si->total_ext),
si->hit_total, si->total_ext);
- seq_printf(s, " - Inner Struct Count: tree: %d, node: %d\n",
- si->ext_tree, si->ext_node);
+ seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
+ si->ext_tree, si->zombie_tree, si->ext_node);
seq_puts(s, "\nBalancing F2FS Async:\n");
seq_printf(s, " - inmem: %4d, wb: %4d\n",
si->inmem_pages, si->wb_pages);
@@ -299,6 +301,8 @@ static int stat_show(struct seq_file *s, void *v)
si->ndirty_node, si->node_pages);
seq_printf(s, " - dents: %4d in dirs:%4d\n",
si->ndirty_dent, si->ndirty_dirs);
+ seq_printf(s, " - datas: %4d in files:%4d\n",
+ si->ndirty_data, si->ndirty_files);
seq_printf(s, " - meta: %4d in %4d\n",
si->ndirty_meta, si->meta_pages);
seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n",
@@ -406,20 +410,23 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
kfree(si);
}
-void __init f2fs_create_root_stats(void)
+int __init f2fs_create_root_stats(void)
{
struct dentry *file;
f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
if (!f2fs_debugfs_root)
- return;
+ return -ENOMEM;
file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root,
NULL, &stat_fops);
if (!file) {
debugfs_remove(f2fs_debugfs_root);
f2fs_debugfs_root = NULL;
+ return -ENOMEM;
}
+
+ return 0;
}
void f2fs_destroy_root_stats(void)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 7c1678ba8..faa7495e2 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -172,8 +172,6 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
namehash = f2fs_dentry_hash(&name);
- f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH);
-
nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
nblock = bucket_blocks(level);
@@ -238,6 +236,14 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
goto out;
max_depth = F2FS_I(dir)->i_current_depth;
+ if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) {
+ f2fs_msg(F2FS_I_SB(dir)->sb, KERN_WARNING,
+ "Corrupted max_depth of %lu: %u",
+ dir->i_ino, max_depth);
+ max_depth = MAX_DIR_HASH_DEPTH;
+ F2FS_I(dir)->i_current_depth = max_depth;
+ mark_inode_dirty(dir);
+ }
for (level = 0; level < max_depth; level++) {
de = find_in_level(dir, level, &fname, res_page);
@@ -444,7 +450,7 @@ error:
/* once the failed inode becomes a bad inode, i_mode is S_IFREG */
truncate_inode_pages(&inode->i_data, 0);
truncate_blocks(inode, 0, false);
- remove_dirty_dir_inode(inode);
+ remove_dirty_inode(inode);
remove_inode_page(inode);
return ERR_PTR(err);
}
@@ -630,6 +636,7 @@ fail:
f2fs_put_page(dentry_page, 1);
out:
f2fs_fname_free_filename(&fname);
+ f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
return err;
}
@@ -651,6 +658,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
fail:
up_write(&F2FS_I(inode)->i_sem);
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return err;
}
@@ -695,6 +703,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
int i;
+ f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
+
if (f2fs_has_inline_dentry(dir))
return f2fs_delete_inline_entry(dentry, page, dir, inode);
@@ -855,25 +865,27 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
for (; n < npages; n++) {
dentry_page = get_lock_data_page(inode, n, false);
- if (IS_ERR(dentry_page))
- continue;
+ if (IS_ERR(dentry_page)) {
+ err = PTR_ERR(dentry_page);
+ if (err == -ENOENT)
+ continue;
+ else
+ goto out;
+ }
dentry_blk = kmap(dentry_page);
make_dentry_ptr(inode, &d, (void *)dentry_blk, 1);
- if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr))
- goto stop;
+ if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr)) {
+ kunmap(dentry_page);
+ f2fs_put_page(dentry_page, 1);
+ break;
+ }
ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK;
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
- dentry_page = NULL;
- }
-stop:
- if (dentry_page && !IS_ERR(dentry_page)) {
- kunmap(dentry_page);
- f2fs_put_page(dentry_page, 1);
}
out:
f2fs_fname_crypto_free_buffer(&fstr);
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 7ddba812e..ccd5c636d 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -36,7 +36,7 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
rb_link_node(&en->rb_node, parent, p);
rb_insert_color(&en->rb_node, &et->root);
- et->count++;
+ atomic_inc(&et->node_cnt);
atomic_inc(&sbi->total_ext_node);
return en;
}
@@ -45,7 +45,7 @@ static void __detach_extent_node(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_node *en)
{
rb_erase(&en->rb_node, &et->root);
- et->count--;
+ atomic_dec(&et->node_cnt);
atomic_dec(&sbi->total_ext_node);
if (et->cached_en == en)
@@ -68,11 +68,13 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
et->root = RB_ROOT;
et->cached_en = NULL;
rwlock_init(&et->lock);
- atomic_set(&et->refcount, 0);
- et->count = 0;
- sbi->total_ext_tree++;
+ INIT_LIST_HEAD(&et->list);
+ atomic_set(&et->node_cnt, 0);
+ atomic_inc(&sbi->total_ext_tree);
+ } else {
+ atomic_dec(&sbi->total_zombie_tree);
+ list_del_init(&et->list);
}
- atomic_inc(&et->refcount);
up_write(&sbi->extent_tree_lock);
/* never died until evict_inode */
@@ -131,7 +133,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
{
struct rb_node *node, *next;
struct extent_node *en;
- unsigned int count = et->count;
+ unsigned int count = atomic_read(&et->node_cnt);
node = rb_first(&et->root);
while (node) {
@@ -152,7 +154,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
node = next;
}
- return count - et->count;
+ return count - atomic_read(&et->node_cnt);
}
static void __drop_largest_extent(struct inode *inode,
@@ -164,34 +166,33 @@ static void __drop_largest_extent(struct inode *inode,
largest->len = 0;
}
-void f2fs_drop_largest_extent(struct inode *inode, pgoff_t fofs)
-{
- if (!f2fs_may_extent_tree(inode))
- return;
-
- __drop_largest_extent(inode, fofs, 1);
-}
-
-void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
+/* return true, if inode page is changed */
+bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et;
struct extent_node *en;
struct extent_info ei;
- if (!f2fs_may_extent_tree(inode))
- return;
+ if (!f2fs_may_extent_tree(inode)) {
+ /* drop largest extent */
+ if (i_ext && i_ext->len) {
+ i_ext->len = 0;
+ return true;
+ }
+ return false;
+ }
et = __grab_extent_tree(inode);
- if (!i_ext || le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN)
- return;
+ if (!i_ext || !i_ext->len)
+ return false;
set_extent_info(&ei, le32_to_cpu(i_ext->fofs),
le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len));
write_lock(&et->lock);
- if (et->count)
+ if (atomic_read(&et->node_cnt))
goto out;
en = __init_extent_tree(sbi, et, &ei);
@@ -202,6 +203,7 @@ void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
}
out:
write_unlock(&et->lock);
+ return false;
}
static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
@@ -549,45 +551,44 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
+ struct extent_tree *et, *next;
struct extent_node *en, *tmp;
unsigned long ino = F2FS_ROOT_INO(sbi);
- struct radix_tree_root *root = &sbi->extent_tree_root;
unsigned int found;
unsigned int node_cnt = 0, tree_cnt = 0;
int remained;
+ bool do_free = false;
if (!test_opt(sbi, EXTENT_CACHE))
return 0;
+ if (!atomic_read(&sbi->total_zombie_tree))
+ goto free_node;
+
if (!down_write_trylock(&sbi->extent_tree_lock))
goto out;
/* 1. remove unreferenced extent tree */
- while ((found = radix_tree_gang_lookup(root,
- (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
- unsigned i;
-
- ino = treevec[found - 1]->ino + 1;
- for (i = 0; i < found; i++) {
- struct extent_tree *et = treevec[i];
-
- if (!atomic_read(&et->refcount)) {
- write_lock(&et->lock);
- node_cnt += __free_extent_tree(sbi, et, true);
- write_unlock(&et->lock);
+ list_for_each_entry_safe(et, next, &sbi->zombie_list, list) {
+ if (atomic_read(&et->node_cnt)) {
+ write_lock(&et->lock);
+ node_cnt += __free_extent_tree(sbi, et, true);
+ write_unlock(&et->lock);
+ }
- radix_tree_delete(root, et->ino);
- kmem_cache_free(extent_tree_slab, et);
- sbi->total_ext_tree--;
- tree_cnt++;
+ list_del_init(&et->list);
+ radix_tree_delete(&sbi->extent_tree_root, et->ino);
+ kmem_cache_free(extent_tree_slab, et);
+ atomic_dec(&sbi->total_ext_tree);
+ atomic_dec(&sbi->total_zombie_tree);
+ tree_cnt++;
- if (node_cnt + tree_cnt >= nr_shrink)
- goto unlock_out;
- }
- }
+ if (node_cnt + tree_cnt >= nr_shrink)
+ goto unlock_out;
}
up_write(&sbi->extent_tree_lock);
+free_node:
/* 2. remove LRU extent entries */
if (!down_write_trylock(&sbi->extent_tree_lock))
goto out;
@@ -599,15 +600,19 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
if (!remained--)
break;
list_del_init(&en->list);
+ do_free = true;
}
spin_unlock(&sbi->extent_lock);
+ if (do_free == false)
+ goto unlock_out;
+
/*
* reset ino for searching victims from beginning of global extent tree.
*/
ino = F2FS_ROOT_INO(sbi);
- while ((found = radix_tree_gang_lookup(root,
+ while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
(void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
unsigned i;
@@ -615,9 +620,13 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
for (i = 0; i < found; i++) {
struct extent_tree *et = treevec[i];
- write_lock(&et->lock);
- node_cnt += __free_extent_tree(sbi, et, false);
- write_unlock(&et->lock);
+ if (!atomic_read(&et->node_cnt))
+ continue;
+
+ if (write_trylock(&et->lock)) {
+ node_cnt += __free_extent_tree(sbi, et, false);
+ write_unlock(&et->lock);
+ }
if (node_cnt + tree_cnt >= nr_shrink)
goto unlock_out;
@@ -637,7 +646,7 @@ unsigned int f2fs_destroy_extent_node(struct inode *inode)
struct extent_tree *et = F2FS_I(inode)->extent_tree;
unsigned int node_cnt = 0;
- if (!et)
+ if (!et || !atomic_read(&et->node_cnt))
return 0;
write_lock(&et->lock);
@@ -656,8 +665,12 @@ void f2fs_destroy_extent_tree(struct inode *inode)
if (!et)
return;
- if (inode->i_nlink && !is_bad_inode(inode) && et->count) {
- atomic_dec(&et->refcount);
+ if (inode->i_nlink && !is_bad_inode(inode) &&
+ atomic_read(&et->node_cnt)) {
+ down_write(&sbi->extent_tree_lock);
+ list_add_tail(&et->list, &sbi->zombie_list);
+ atomic_inc(&sbi->total_zombie_tree);
+ up_write(&sbi->extent_tree_lock);
return;
}
@@ -666,11 +679,10 @@ void f2fs_destroy_extent_tree(struct inode *inode)
/* delete extent tree entry in radix tree */
down_write(&sbi->extent_tree_lock);
- atomic_dec(&et->refcount);
- f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count);
+ f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
kmem_cache_free(extent_tree_slab, et);
- sbi->total_ext_tree--;
+ atomic_dec(&sbi->total_ext_tree);
up_write(&sbi->extent_tree_lock);
F2FS_I(inode)->extent_tree = NULL;
@@ -722,7 +734,9 @@ void init_extent_cache_info(struct f2fs_sb_info *sbi)
init_rwsem(&sbi->extent_tree_lock);
INIT_LIST_HEAD(&sbi->extent_list);
spin_lock_init(&sbi->extent_lock);
- sbi->total_ext_tree = 0;
+ atomic_set(&sbi->total_ext_tree, 0);
+ INIT_LIST_HEAD(&sbi->zombie_list);
+ atomic_set(&sbi->total_zombie_tree, 0);
atomic_set(&sbi->total_ext_node, 0);
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 9db5500d6..ff79054c6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -21,6 +21,7 @@
#include <linux/sched.h>
#include <linux/vmalloc.h>
#include <linux/bio.h>
+#include <linux/blkdev.h>
#ifdef CONFIG_F2FS_CHECK_FS
#define f2fs_bug_on(sbi, condition) BUG_ON(condition)
@@ -54,6 +55,7 @@
#define F2FS_MOUNT_FASTBOOT 0x00001000
#define F2FS_MOUNT_EXTENT_CACHE 0x00002000
#define F2FS_MOUNT_FORCE_FG_GC 0x00004000
+#define F2FS_MOUNT_DATA_FLUSH 0x00008000
#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -125,6 +127,7 @@ enum {
#define BATCHED_TRIM_BLOCKS(sbi) \
(BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
#define DEF_CP_INTERVAL 60 /* 60 secs */
+#define DEF_IDLE_INTERVAL 120 /* 2 mins */
struct cp_control {
int reason;
@@ -158,13 +161,7 @@ struct ino_entry {
nid_t ino; /* inode number */
};
-/*
- * for the list of directory inodes or gc inodes.
- * NOTE: there are two slab users for this structure, if we add/modify/delete
- * fields in structure for one of slab users, it may affect fields or size of
- * other one, in this condition, it's better to split both of slab and related
- * data structure.
- */
+/* for the list of inodes to be GCed */
struct inode_entry {
struct list_head list; /* list head */
struct inode *inode; /* vfs inode pointer */
@@ -234,6 +231,7 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
#define F2FS_IOC_GARBAGE_COLLECT _IO(F2FS_IOCTL_MAGIC, 6)
#define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7)
+#define F2FS_IOC_DEFRAGMENT _IO(F2FS_IOCTL_MAGIC, 8)
#define F2FS_IOC_SET_ENCRYPTION_POLICY \
_IOR('f', 19, struct f2fs_encryption_policy)
@@ -256,10 +254,16 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
/*
* ioctl commands in 32 bit emulation
*/
-#define F2FS_IOC32_GETFLAGS FS_IOC32_GETFLAGS
-#define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS
+#define F2FS_IOC32_GETFLAGS FS_IOC32_GETFLAGS
+#define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS
+#define F2FS_IOC32_GETVERSION FS_IOC32_GETVERSION
#endif
+struct f2fs_defragment {
+ u64 start;
+ u64 len;
+};
+
/*
* For INODE and NODE manager
*/
@@ -357,9 +361,9 @@ struct extent_tree {
struct rb_root root; /* root of extent info rb-tree */
struct extent_node *cached_en; /* recently accessed extent node */
struct extent_info largest; /* largested extent info */
+ struct list_head list; /* to be used by sbi->zombie_list */
rwlock_t lock; /* protect extent info rb-tree */
- atomic_t refcount; /* reference count of rb-tree */
- unsigned int count; /* # of extent node in rb-tree*/
+ atomic_t node_cnt; /* # of extent node in rb-tree*/
};
/*
@@ -434,8 +438,8 @@ struct f2fs_inode_info {
unsigned int clevel; /* maximum level of given file name */
nid_t i_xattr_nid; /* node id that contains xattrs */
unsigned long long xattr_ver; /* cp version of xattr modification */
- struct inode_entry *dirty_dir; /* the pointer of dirty dir */
+ struct list_head dirty_list; /* linked in global dirty list */
struct list_head inmem_pages; /* inmemory pages managed by f2fs */
struct mutex inmem_lock; /* lock for inmemory pages */
@@ -544,6 +548,7 @@ struct dnode_of_data {
nid_t nid; /* node id of the direct node block */
unsigned int ofs_in_node; /* data offset in the node page */
bool inode_page_locked; /* inode page is locked or not */
+ bool node_changed; /* is node block changed */
block_t data_blkaddr; /* block address of the node block */
};
@@ -647,6 +652,7 @@ struct f2fs_sm_info {
enum count_type {
F2FS_WRITEBACK,
F2FS_DIRTY_DENTS,
+ F2FS_DIRTY_DATA,
F2FS_DIRTY_NODES,
F2FS_DIRTY_META,
F2FS_INMEM_PAGES,
@@ -695,6 +701,12 @@ struct f2fs_bio_info {
struct rw_semaphore io_rwsem; /* blocking op for bio */
};
+enum inode_type {
+ DIR_INODE, /* for dirty dir inode */
+ FILE_INODE, /* for dirty regular/symlink inode */
+ NR_INODE_TYPE,
+};
+
/* for inner inode cache management */
struct inode_management {
struct radix_tree_root ino_root; /* ino entry array */
@@ -711,11 +723,17 @@ enum {
SBI_POR_DOING, /* recovery is doing or not */
};
+enum {
+ CP_TIME,
+ REQ_TIME,
+ MAX_TIME,
+};
+
struct f2fs_sb_info {
struct super_block *sb; /* pointer to VFS super block */
struct proc_dir_entry *s_proc; /* proc entry */
- struct buffer_head *raw_super_buf; /* buffer head of raw sb */
struct f2fs_super_block *raw_super; /* raw super block pointer */
+ int valid_super_block; /* valid super block no */
int s_flag; /* flags for sbi */
/* for node-related operations */
@@ -737,23 +755,26 @@ struct f2fs_sb_info {
struct rw_semaphore node_write; /* locking node writes */
struct mutex writepages; /* mutex for writepages() */
wait_queue_head_t cp_wait;
- long cp_expires, cp_interval; /* next expected periodic cp */
+ unsigned long last_time[MAX_TIME]; /* to store time in jiffies */
+ long interval_time[MAX_TIME]; /* to store thresholds */
struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
/* for orphan inode, use 0'th array */
unsigned int max_orphans; /* max orphan inodes */
- /* for directory inode management */
- struct list_head dir_inode_list; /* dir inode list */
- spinlock_t dir_inode_lock; /* for dir inode list lock */
+ /* for inode management */
+ struct list_head inode_list[NR_INODE_TYPE]; /* dirty inode list */
+ spinlock_t inode_lock[NR_INODE_TYPE]; /* for dirty inode list lock */
/* for extent tree cache */
struct radix_tree_root extent_tree_root;/* cache extent cache entries */
struct rw_semaphore extent_tree_lock; /* locking extent radix tree */
struct list_head extent_list; /* lru list for shrinker */
spinlock_t extent_lock; /* locking extent lru list */
- int total_ext_tree; /* extent tree count */
+ atomic_t total_ext_tree; /* extent tree count */
+ struct list_head zombie_list; /* extent zombie tree list */
+ atomic_t total_zombie_tree; /* extent zombie tree count */
atomic_t total_ext_node; /* extent info count */
/* basic filesystem units */
@@ -771,6 +792,7 @@ struct f2fs_sb_info {
unsigned int total_node_count; /* total node block count */
unsigned int total_valid_node_count; /* valid node block count */
unsigned int total_valid_inode_count; /* valid inode count */
+ loff_t max_file_blocks; /* max block index of file */
int active_logs; /* # of active logs */
int dir_level; /* directory level */
@@ -809,7 +831,7 @@ struct f2fs_sb_info {
atomic_t inline_inode; /* # of inline_data inodes */
atomic_t inline_dir; /* # of inline_dentry inodes */
int bg_gc; /* background gc calls */
- unsigned int n_dirty_dirs; /* # of dir inodes */
+ unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */
#endif
unsigned int last_victim[2]; /* last victim segment # */
spinlock_t stat_lock; /* lock for stat operations */
@@ -824,6 +846,31 @@ struct f2fs_sb_info {
unsigned int shrinker_run_no;
};
+static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
+{
+ sbi->last_time[type] = jiffies;
+}
+
+static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type)
+{
+ struct timespec ts = {sbi->interval_time[type], 0};
+ unsigned long interval = timespec_to_jiffies(&ts);
+
+ return time_after(jiffies, sbi->last_time[type] + interval);
+}
+
+static inline bool is_idle(struct f2fs_sb_info *sbi)
+{
+ struct block_device *bdev = sbi->sb->s_bdev;
+ struct request_queue *q = bdev_get_queue(bdev);
+ struct request_list *rl = &q->root_rl;
+
+ if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC])
+ return 0;
+
+ return f2fs_time_over(sbi, REQ_TIME);
+}
+
/*
* Inline functions
*/
@@ -1059,8 +1106,8 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
static inline void inode_inc_dirty_pages(struct inode *inode)
{
atomic_inc(&F2FS_I(inode)->dirty_pages);
- if (S_ISDIR(inode->i_mode))
- inc_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS);
+ inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
+ F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
}
static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
@@ -1075,9 +1122,8 @@ static inline void inode_dec_dirty_pages(struct inode *inode)
return;
atomic_dec(&F2FS_I(inode)->dirty_pages);
-
- if (S_ISDIR(inode->i_mode))
- dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS);
+ dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
+ F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
}
static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)
@@ -1092,8 +1138,7 @@ static inline int get_dirty_pages(struct inode *inode)
static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
{
- unsigned int pages_per_sec = sbi->segs_per_sec *
- (1 << sbi->log_blocks_per_seg);
+ unsigned int pages_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg;
return ((get_pages(sbi, block_type) + pages_per_sec - 1)
>> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
}
@@ -1416,6 +1461,8 @@ enum {
FI_DROP_CACHE, /* drop dirty page cache */
FI_DATA_EXIST, /* indicate data exists */
FI_INLINE_DOTS, /* indicate inline dot dentries */
+ FI_DO_DEFRAG, /* indicate defragment is running */
+ FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
};
static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
@@ -1602,13 +1649,11 @@ static inline bool is_dot_dotdot(const struct qstr *str)
static inline bool f2fs_may_extent_tree(struct inode *inode)
{
- mode_t mode = inode->i_mode;
-
if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE) ||
is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
return false;
- return S_ISREG(mode);
+ return S_ISREG(inode->i_mode);
}
static inline void *f2fs_kvmalloc(size_t size, gfp_t flags)
@@ -1661,8 +1706,8 @@ long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long);
void f2fs_set_inode_flags(struct inode *);
struct inode *f2fs_iget(struct super_block *, unsigned long);
int try_to_free_nats(struct f2fs_sb_info *, int);
-void update_inode(struct inode *, struct page *);
-void update_inode_page(struct inode *);
+int update_inode(struct inode *, struct page *);
+int update_inode_page(struct inode *);
int f2fs_write_inode(struct inode *, struct writeback_control *);
void f2fs_evict_inode(struct inode *);
void handle_failed_inode(struct inode *);
@@ -1767,7 +1812,7 @@ void destroy_node_manager_caches(void);
*/
void register_inmem_page(struct inode *, struct page *);
int commit_inmem_pages(struct inode *, bool);
-void f2fs_balance_fs(struct f2fs_sb_info *);
+void f2fs_balance_fs(struct f2fs_sb_info *, bool);
void f2fs_balance_fs_bg(struct f2fs_sb_info *);
int f2fs_issue_flush(struct f2fs_sb_info *);
int create_flush_cmd_control(struct f2fs_sb_info *);
@@ -1813,9 +1858,9 @@ bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int);
int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int, bool);
void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t);
long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
-void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
-void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
-void release_dirty_inode(struct f2fs_sb_info *);
+void add_ino_entry(struct f2fs_sb_info *, nid_t, int type);
+void remove_ino_entry(struct f2fs_sb_info *, nid_t, int type);
+void release_ino_entry(struct f2fs_sb_info *);
bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
int acquire_orphan_inode(struct f2fs_sb_info *);
void release_orphan_inode(struct f2fs_sb_info *);
@@ -1825,9 +1870,9 @@ int recover_orphan_inodes(struct f2fs_sb_info *);
int get_valid_checkpoint(struct f2fs_sb_info *);
void update_dirty_page(struct inode *, struct page *);
void add_dirty_dir_inode(struct inode *);
-void remove_dirty_dir_inode(struct inode *);
-void sync_dirty_dir_inodes(struct f2fs_sb_info *);
-void write_checkpoint(struct f2fs_sb_info *, struct cp_control *);
+void remove_dirty_inode(struct inode *);
+int sync_dirty_inodes(struct f2fs_sb_info *, enum inode_type);
+int write_checkpoint(struct f2fs_sb_info *, struct cp_control *);
void init_ino_entry_info(struct f2fs_sb_info *);
int __init create_checkpoint_caches(void);
void destroy_checkpoint_caches(void);
@@ -1847,6 +1892,7 @@ struct page *find_data_page(struct inode *, pgoff_t);
struct page *get_lock_data_page(struct inode *, pgoff_t, bool);
struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
int do_write_data_page(struct f2fs_io_info *);
+int f2fs_map_blocks(struct inode *, struct f2fs_map_blocks *, int, int);
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
void f2fs_invalidate_page(struct page *, unsigned int, unsigned int);
int f2fs_release_page(struct page *, gfp_t);
@@ -1877,8 +1923,9 @@ struct f2fs_stat_info {
int main_area_segs, main_area_sections, main_area_zones;
unsigned long long hit_largest, hit_cached, hit_rbtree;
unsigned long long hit_total, total_ext;
- int ext_tree, ext_node;
- int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
+ int ext_tree, zombie_tree, ext_node;
+ int ndirty_node, ndirty_meta;
+ int ndirty_dent, ndirty_dirs, ndirty_data, ndirty_files;
int nats, dirty_nats, sits, dirty_sits, fnids;
int total_count, utilization;
int bg_gc, inmem_pages, wb_pages;
@@ -1888,7 +1935,7 @@ struct f2fs_stat_info {
int util_free, util_valid, util_invalid;
int rsvd_segs, overp_segs;
int dirty_count, node_pages, meta_pages;
- int prefree_count, call_count, cp_count;
+ int prefree_count, call_count, cp_count, bg_cp_count;
int tot_segs, node_segs, data_segs, free_segs, free_secs;
int bg_node_segs, bg_data_segs;
int tot_blks, data_blks, node_blks;
@@ -1909,10 +1956,11 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
}
#define stat_inc_cp_count(si) ((si)->cp_count++)
+#define stat_inc_bg_cp_count(si) ((si)->bg_cp_count++)
#define stat_inc_call_count(si) ((si)->call_count++)
#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++)
-#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++)
-#define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--)
+#define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++)
+#define stat_dec_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]--)
#define stat_inc_total_hit(sbi) (atomic64_inc(&(sbi)->total_hit_ext))
#define stat_inc_rbtree_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_rbtree))
#define stat_inc_largest_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_largest))
@@ -1987,14 +2035,15 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
int f2fs_build_stats(struct f2fs_sb_info *);
void f2fs_destroy_stats(struct f2fs_sb_info *);
-void __init f2fs_create_root_stats(void);
+int __init f2fs_create_root_stats(void);
void f2fs_destroy_root_stats(void);
#else
#define stat_inc_cp_count(si)
+#define stat_inc_bg_cp_count(si)
#define stat_inc_call_count(si)
#define stat_inc_bggc_count(si)
-#define stat_inc_dirty_dir(sbi)
-#define stat_dec_dirty_dir(sbi)
+#define stat_inc_dirty_inode(sbi, type)
+#define stat_dec_dirty_inode(sbi, type)
#define stat_inc_total_hit(sb)
#define stat_inc_rbtree_node_hit(sb)
#define stat_inc_largest_node_hit(sbi)
@@ -2015,7 +2064,7 @@ void f2fs_destroy_root_stats(void);
static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
-static inline void __init f2fs_create_root_stats(void) { }
+static inline int __init f2fs_create_root_stats(void) { return 0; }
static inline void f2fs_destroy_root_stats(void) { }
#endif
@@ -2069,8 +2118,7 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *);
* extent_cache.c
*/
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *, int);
-void f2fs_drop_largest_extent(struct inode *, pgoff_t);
-void f2fs_init_extent_tree(struct inode *, struct f2fs_extent *);
+bool f2fs_init_extent_tree(struct inode *, struct f2fs_extent *);
unsigned int f2fs_destroy_extent_node(struct inode *);
void f2fs_destroy_extent_tree(struct inode *);
bool f2fs_lookup_extent_cache(struct inode *, pgoff_t, struct extent_info *);
@@ -2121,7 +2169,7 @@ static inline int f2fs_sb_has_crypto(struct super_block *sb)
static inline bool f2fs_may_encrypt(struct inode *inode)
{
#ifdef CONFIG_F2FS_FS_ENCRYPTION
- mode_t mode = inode->i_mode;
+ umode_t mode = inode->i_mode;
return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode));
#else
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index a197215ad..ea272be62 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -40,8 +40,6 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
struct dnode_of_data dn;
int err;
- f2fs_balance_fs(sbi);
-
sb_start_pagefault(inode->i_sb);
f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
@@ -57,6 +55,8 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
+ f2fs_balance_fs(sbi, dn.node_changed);
+
file_update_time(vma->vm_file);
lock_page(page);
if (unlikely(page->mapping != inode->i_mapping ||
@@ -96,6 +96,7 @@ mapped:
clear_cold_data(page);
out:
sb_end_pagefault(inode->i_sb);
+ f2fs_update_time(sbi, REQ_TIME);
return block_page_mkwrite_return(err);
}
@@ -201,7 +202,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
trace_f2fs_sync_file_enter(inode);
/* if fdatasync is triggered, let's do in-place-update */
- if (get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
+ if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
set_inode_flag(fi, FI_NEED_IPU);
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
clear_inode_flag(fi, FI_NEED_IPU);
@@ -233,9 +234,6 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
goto out;
}
go_write:
- /* guarantee free sections for fsync */
- f2fs_balance_fs(sbi);
-
/*
* Both of fdatasync() and fsync() are able to be recovered from
* sudden-power-off.
@@ -261,8 +259,10 @@ sync_nodes:
sync_node_pages(sbi, ino, &wbc);
/* if cp_error was enabled, we should avoid infinite loop */
- if (unlikely(f2fs_cp_error(sbi)))
+ if (unlikely(f2fs_cp_error(sbi))) {
+ ret = -EIO;
goto out;
+ }
if (need_inode_block_update(sbi, ino)) {
mark_inode_dirty_sync(inode);
@@ -275,12 +275,13 @@ sync_nodes:
goto out;
/* once recovery info is written, don't need to tack this */
- remove_dirty_inode(sbi, ino, APPEND_INO);
+ remove_ino_entry(sbi, ino, APPEND_INO);
clear_inode_flag(fi, FI_APPEND_WRITE);
flush_out:
- remove_dirty_inode(sbi, ino, UPDATE_INO);
+ remove_ino_entry(sbi, ino, UPDATE_INO);
clear_inode_flag(fi, FI_UPDATE_WRITE);
ret = f2fs_issue_flush(sbi);
+ f2fs_update_time(sbi, REQ_TIME);
out:
trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
f2fs_trace_ios(NULL, 1);
@@ -332,7 +333,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
loff_t isize;
int err = 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
isize = i_size_read(inode);
if (offset >= isize)
@@ -387,10 +388,10 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
found:
if (whence == SEEK_HOLE && data_ofs > isize)
data_ofs = isize;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return vfs_setpos(file, data_ofs, maxbytes);
fail:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return -ENXIO;
}
@@ -418,19 +419,18 @@ static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file_inode(file);
+ int err;
if (f2fs_encrypted_inode(inode)) {
- int err = f2fs_get_encryption_info(inode);
+ err = f2fs_get_encryption_info(inode);
if (err)
return 0;
}
/* we don't need to use inline_data strictly */
- if (f2fs_has_inline_data(inode)) {
- int err = f2fs_convert_inline_inode(inode);
- if (err)
- return err;
- }
+ err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
file_accessed(file);
vma->vm_ops = &f2fs_file_vm_ops;
@@ -483,11 +483,11 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
F2FS_I(dn->inode)) + ofs;
f2fs_update_extent_cache_range(dn, fofs, 0, len);
dec_valid_block_count(sbi, dn->inode, nr_free);
- set_page_dirty(dn->node_page);
sync_inode_page(dn);
}
dn->ofs_in_node = ofs;
+ f2fs_update_time(sbi, REQ_TIME);
trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid,
dn->ofs_in_node, nr_free);
return nr_free;
@@ -604,7 +604,7 @@ int f2fs_truncate(struct inode *inode, bool lock)
trace_f2fs_truncate(inode);
/* we should check inline_data size */
- if (f2fs_has_inline_data(inode) && !f2fs_may_inline_data(inode)) {
+ if (!f2fs_may_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
if (err)
return err;
@@ -679,13 +679,20 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
err = f2fs_truncate(inode, true);
if (err)
return err;
- f2fs_balance_fs(F2FS_I_SB(inode));
+ f2fs_balance_fs(F2FS_I_SB(inode), true);
} else {
/*
* do not trim all blocks after i_size if target size is
* larger than i_size.
*/
truncate_setsize(inode, attr->ia_size);
+
+ /* should convert inline inode here */
+ if (!f2fs_may_inline_data(inode)) {
+ err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
+ }
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
}
}
@@ -727,7 +734,7 @@ static int fill_zero(struct inode *inode, pgoff_t index,
if (!len)
return 0;
- f2fs_balance_fs(sbi);
+ f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
page = get_new_data_page(inode, NULL, index, false);
@@ -778,13 +785,11 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
{
pgoff_t pg_start, pg_end;
loff_t off_start, off_end;
- int ret = 0;
+ int ret;
- if (f2fs_has_inline_data(inode)) {
- ret = f2fs_convert_inline_inode(inode);
- if (ret)
- return ret;
- }
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
@@ -815,7 +820,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
loff_t blk_start, blk_end;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- f2fs_balance_fs(sbi);
+ f2fs_balance_fs(sbi, true);
blk_start = (loff_t)pg_start << PAGE_CACHE_SHIFT;
blk_end = (loff_t)pg_end << PAGE_CACHE_SHIFT;
@@ -918,7 +923,7 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
int ret = 0;
for (; end < nrpages; start++, end++) {
- f2fs_balance_fs(sbi);
+ f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
ret = __exchange_data_block(inode, end, start, true);
f2fs_unlock_op(sbi);
@@ -941,13 +946,9 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
return -EINVAL;
- f2fs_balance_fs(F2FS_I_SB(inode));
-
- if (f2fs_has_inline_data(inode)) {
- ret = f2fs_convert_inline_inode(inode);
- if (ret)
- return ret;
- }
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
pg_start = offset >> PAGE_CACHE_SHIFT;
pg_end = (offset + len) >> PAGE_CACHE_SHIFT;
@@ -991,13 +992,9 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
if (ret)
return ret;
- f2fs_balance_fs(sbi);
-
- if (f2fs_has_inline_data(inode)) {
- ret = f2fs_convert_inline_inode(inode);
- if (ret)
- return ret;
- }
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
if (ret)
@@ -1104,13 +1101,11 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
return -EINVAL;
- f2fs_balance_fs(sbi);
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
- if (f2fs_has_inline_data(inode)) {
- ret = f2fs_convert_inline_inode(inode);
- if (ret)
- return ret;
- }
+ f2fs_balance_fs(sbi, true);
ret = truncate_blocks(inode, i_size_read(inode), true);
if (ret)
@@ -1154,17 +1149,15 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
loff_t off_start, off_end;
int ret = 0;
- f2fs_balance_fs(sbi);
-
ret = inode_newsize_ok(inode, (len + offset));
if (ret)
return ret;
- if (f2fs_has_inline_data(inode)) {
- ret = f2fs_convert_inline_inode(inode);
- if (ret)
- return ret;
- }
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
+
+ f2fs_balance_fs(sbi, true);
pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
@@ -1226,7 +1219,7 @@ static long f2fs_fallocate(struct file *file, int mode,
FALLOC_FL_INSERT_RANGE))
return -EOPNOTSUPP;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (mode & FALLOC_FL_PUNCH_HOLE) {
if (offset >= inode->i_size)
@@ -1246,10 +1239,11 @@ static long f2fs_fallocate(struct file *file, int mode,
if (!ret) {
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
}
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
trace_f2fs_fallocate(inode, mode, offset, len, ret);
return ret;
@@ -1313,13 +1307,13 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
flags = f2fs_mask_flags(inode->i_mode, flags);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
oldflags = fi->i_flags;
if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
if (!capable(CAP_LINUX_IMMUTABLE)) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
ret = -EPERM;
goto out;
}
@@ -1328,7 +1322,7 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
flags = flags & FS_FL_USER_MODIFIABLE;
flags |= oldflags & ~FS_FL_USER_MODIFIABLE;
fi->i_flags = flags;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
f2fs_set_inode_flags(inode);
inode->i_ctime = CURRENT_TIME;
@@ -1353,8 +1347,6 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
if (!inode_owner_or_capable(inode))
return -EACCES;
- f2fs_balance_fs(F2FS_I_SB(inode));
-
if (f2fs_is_atomic_file(inode))
return 0;
@@ -1363,6 +1355,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
return ret;
set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+
return 0;
}
@@ -1384,8 +1378,10 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
if (f2fs_is_atomic_file(inode)) {
clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
ret = commit_inmem_pages(inode, false);
- if (ret)
+ if (ret) {
+ set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
goto err_out;
+ }
}
ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0);
@@ -1410,6 +1406,7 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
return ret;
set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return 0;
}
@@ -1441,13 +1438,17 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
if (ret)
return ret;
- f2fs_balance_fs(F2FS_I_SB(inode));
-
- clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
- clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
- commit_inmem_pages(inode, true);
+ if (f2fs_is_atomic_file(inode)) {
+ clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
+ commit_inmem_pages(inode, true);
+ }
+ if (f2fs_is_volatile_file(inode)) {
+ clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
+ ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0);
+ }
mnt_drop_write_file(filp);
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return ret;
}
@@ -1487,6 +1488,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
default:
return -EINVAL;
}
+ f2fs_update_time(sbi, REQ_TIME);
return 0;
}
@@ -1517,6 +1519,7 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
if (copy_to_user((struct fstrim_range __user *)arg, &range,
sizeof(range)))
return -EFAULT;
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return 0;
}
@@ -1540,6 +1543,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
sizeof(policy)))
return -EFAULT;
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return f2fs_process_policy(&policy, inode);
#else
return -EOPNOTSUPP;
@@ -1586,13 +1590,13 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
generate_random_uuid(sbi->raw_super->encrypt_pw_salt);
err = f2fs_commit_super(sbi, false);
-
- mnt_drop_write_file(filp);
if (err) {
/* undo new data */
memset(sbi->raw_super->encrypt_pw_salt, 0, 16);
+ mnt_drop_write_file(filp);
return err;
}
+ mnt_drop_write_file(filp);
got_it:
if (copy_to_user((__u8 __user *)arg, sbi->raw_super->encrypt_pw_salt,
16))
@@ -1629,7 +1633,6 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct cp_control cpc;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -1637,13 +1640,196 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
if (f2fs_readonly(sbi->sb))
return -EROFS;
- cpc.reason = __get_cp_reason(sbi);
+ return f2fs_sync_fs(sbi->sb, 1);
+}
- mutex_lock(&sbi->gc_mutex);
- write_checkpoint(sbi, &cpc);
- mutex_unlock(&sbi->gc_mutex);
+static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
+ struct file *filp,
+ struct f2fs_defragment *range)
+{
+ struct inode *inode = file_inode(filp);
+ struct f2fs_map_blocks map;
+ struct extent_info ei;
+ pgoff_t pg_start, pg_end;
+ unsigned int blk_per_seg = sbi->blocks_per_seg;
+ unsigned int total = 0, sec_num;
+ unsigned int pages_per_sec = sbi->segs_per_sec * blk_per_seg;
+ block_t blk_end = 0;
+ bool fragmented = false;
+ int err;
- return 0;
+ /* if in-place-update policy is enabled, don't waste time here */
+ if (need_inplace_update(inode))
+ return -EINVAL;
+
+ pg_start = range->start >> PAGE_CACHE_SHIFT;
+ pg_end = (range->start + range->len) >> PAGE_CACHE_SHIFT;
+
+ f2fs_balance_fs(sbi, true);
+
+ inode_lock(inode);
+
+ /* writeback all dirty pages in the range */
+ err = filemap_write_and_wait_range(inode->i_mapping, range->start,
+ range->start + range->len - 1);
+ if (err)
+ goto out;
+
+ /*
+ * lookup mapping info in extent cache, skip defragmenting if physical
+ * block addresses are continuous.
+ */
+ if (f2fs_lookup_extent_cache(inode, pg_start, &ei)) {
+ if (ei.fofs + ei.len >= pg_end)
+ goto out;
+ }
+
+ map.m_lblk = pg_start;
+
+ /*
+ * lookup mapping info in dnode page cache, skip defragmenting if all
+ * physical block addresses are continuous even if there are hole(s)
+ * in logical blocks.
+ */
+ while (map.m_lblk < pg_end) {
+ map.m_len = pg_end - map.m_lblk;
+ err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ);
+ if (err)
+ goto out;
+
+ if (!(map.m_flags & F2FS_MAP_FLAGS)) {
+ map.m_lblk++;
+ continue;
+ }
+
+ if (blk_end && blk_end != map.m_pblk) {
+ fragmented = true;
+ break;
+ }
+ blk_end = map.m_pblk + map.m_len;
+
+ map.m_lblk += map.m_len;
+ }
+
+ if (!fragmented)
+ goto out;
+
+ map.m_lblk = pg_start;
+ map.m_len = pg_end - pg_start;
+
+ sec_num = (map.m_len + pages_per_sec - 1) / pages_per_sec;
+
+ /*
+ * make sure there are enough free section for LFS allocation, this can
+ * avoid defragment running in SSR mode when free section are allocated
+ * intensively
+ */
+ if (has_not_enough_free_secs(sbi, sec_num)) {
+ err = -EAGAIN;
+ goto out;
+ }
+
+ while (map.m_lblk < pg_end) {
+ pgoff_t idx;
+ int cnt = 0;
+
+do_map:
+ map.m_len = pg_end - map.m_lblk;
+ err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ);
+ if (err)
+ goto clear_out;
+
+ if (!(map.m_flags & F2FS_MAP_FLAGS)) {
+ map.m_lblk++;
+ continue;
+ }
+
+ set_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);
+
+ idx = map.m_lblk;
+ while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) {
+ struct page *page;
+
+ page = get_lock_data_page(inode, idx, true);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto clear_out;
+ }
+
+ set_page_dirty(page);
+ f2fs_put_page(page, 1);
+
+ idx++;
+ cnt++;
+ total++;
+ }
+
+ map.m_lblk = idx;
+
+ if (idx < pg_end && cnt < blk_per_seg)
+ goto do_map;
+
+ clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);
+
+ err = filemap_fdatawrite(inode->i_mapping);
+ if (err)
+ goto out;
+ }
+clear_out:
+ clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);
+out:
+ inode_unlock(inode);
+ if (!err)
+ range->len = (u64)total << PAGE_CACHE_SHIFT;
+ return err;
+}
+
+static int f2fs_ioc_defragment(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_defragment range;
+ int err;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!S_ISREG(inode->i_mode))
+ return -EINVAL;
+
+ err = mnt_want_write_file(filp);
+ if (err)
+ return err;
+
+ if (f2fs_readonly(sbi->sb)) {
+ err = -EROFS;
+ goto out;
+ }
+
+ if (copy_from_user(&range, (struct f2fs_defragment __user *)arg,
+ sizeof(range))) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ /* verify alignment of offset & size */
+ if (range.start & (F2FS_BLKSIZE - 1) ||
+ range.len & (F2FS_BLKSIZE - 1)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = f2fs_defragment_range(sbi, filp, &range);
+ f2fs_update_time(sbi, REQ_TIME);
+ if (err < 0)
+ goto out;
+
+ if (copy_to_user((struct f2fs_defragment __user *)arg, &range,
+ sizeof(range)))
+ err = -EFAULT;
+out:
+ mnt_drop_write_file(filp);
+ return err;
}
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
@@ -1679,6 +1865,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return f2fs_ioc_gc(filp, arg);
case F2FS_IOC_WRITE_CHECKPOINT:
return f2fs_ioc_write_checkpoint(filp, arg);
+ case F2FS_IOC_DEFRAGMENT:
+ return f2fs_ioc_defragment(filp, arg);
default:
return -ENOTTY;
}
@@ -1706,6 +1894,22 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case F2FS_IOC32_SETFLAGS:
cmd = F2FS_IOC_SETFLAGS;
break;
+ case F2FS_IOC32_GETVERSION:
+ cmd = F2FS_IOC_GETVERSION;
+ break;
+ case F2FS_IOC_START_ATOMIC_WRITE:
+ case F2FS_IOC_COMMIT_ATOMIC_WRITE:
+ case F2FS_IOC_START_VOLATILE_WRITE:
+ case F2FS_IOC_RELEASE_VOLATILE_WRITE:
+ case F2FS_IOC_ABORT_VOLATILE_WRITE:
+ case F2FS_IOC_SHUTDOWN:
+ case F2FS_IOC_SET_ENCRYPTION_POLICY:
+ case F2FS_IOC_GET_ENCRYPTION_PWSALT:
+ case F2FS_IOC_GET_ENCRYPTION_POLICY:
+ case F2FS_IOC_GARBAGE_COLLECT:
+ case F2FS_IOC_WRITE_CHECKPOINT:
+ case F2FS_IOC_DEFRAGMENT:
+ break;
default:
return -ENOIOCTLCMD;
}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index fedbf67a0..f610c2a9b 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -16,7 +16,6 @@
#include <linux/kthread.h>
#include <linux/delay.h>
#include <linux/freezer.h>
-#include <linux/blkdev.h>
#include "f2fs.h"
#include "node.h"
@@ -173,9 +172,9 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
{
/* SSR allocates in a segment unit */
if (p->alloc_mode == SSR)
- return 1 << sbi->log_blocks_per_seg;
+ return sbi->blocks_per_seg;
if (p->gc_mode == GC_GREEDY)
- return (1 << sbi->log_blocks_per_seg) * p->ofs_unit;
+ return sbi->blocks_per_seg * p->ofs_unit;
else if (p->gc_mode == GC_CB)
return UINT_MAX;
else /* No other gc_mode */
@@ -832,8 +831,10 @@ gc_more:
if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
goto stop;
- if (unlikely(f2fs_cp_error(sbi)))
+ if (unlikely(f2fs_cp_error(sbi))) {
+ ret = -EIO;
goto stop;
+ }
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed)) {
gc_type = FG_GC;
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index b4a65be9f..a993967dc 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -100,11 +100,3 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
return true;
return false;
}
-
-static inline int is_idle(struct f2fs_sb_info *sbi)
-{
- struct block_device *bdev = sbi->sb->s_bdev;
- struct request_queue *q = bdev_get_queue(bdev);
- struct request_list *rl = &q->root_rl;
- return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]);
-}
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index bda712646..c3f0b7d4c 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -16,9 +16,6 @@
bool f2fs_may_inline_data(struct inode *inode)
{
- if (!test_opt(F2FS_I_SB(inode), INLINE_DATA))
- return false;
-
if (f2fs_is_atomic_file(inode))
return false;
@@ -177,6 +174,9 @@ int f2fs_convert_inline_inode(struct inode *inode)
struct page *ipage, *page;
int err = 0;
+ if (!f2fs_has_inline_data(inode))
+ return 0;
+
page = grab_cache_page(inode->i_mapping, 0);
if (!page)
return -ENOMEM;
@@ -199,6 +199,9 @@ out:
f2fs_unlock_op(sbi);
f2fs_put_page(page, 1);
+
+ f2fs_balance_fs(sbi, dn.node_changed);
+
return err;
}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 97e20deca..2adeff26b 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -138,7 +138,8 @@ static int do_read_inode(struct inode *inode)
fi->i_pino = le32_to_cpu(ri->i_pino);
fi->i_dir_level = ri->i_dir_level;
- f2fs_init_extent_tree(inode, &ri->i_ext);
+ if (f2fs_init_extent_tree(inode, &ri->i_ext))
+ set_page_dirty(node_page);
get_inline_info(fi, ri);
@@ -202,6 +203,7 @@ make_now:
inode->i_op = &f2fs_encrypted_symlink_inode_operations;
else
inode->i_op = &f2fs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &f2fs_dblock_aops;
} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
@@ -221,7 +223,7 @@ bad_inode:
return ERR_PTR(ret);
}
-void update_inode(struct inode *inode, struct page *node_page)
+int update_inode(struct inode *inode, struct page *node_page)
{
struct f2fs_inode *ri;
@@ -259,15 +261,16 @@ void update_inode(struct inode *inode, struct page *node_page)
__set_inode_rdev(inode, ri);
set_cold_node(inode, node_page);
- set_page_dirty(node_page);
-
clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
+
+ return set_page_dirty(node_page);
}
-void update_inode_page(struct inode *inode)
+int update_inode_page(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct page *node_page;
+ int ret = 0;
retry:
node_page = get_node_page(sbi, inode->i_ino);
if (IS_ERR(node_page)) {
@@ -278,10 +281,11 @@ retry:
} else if (err != -ENOENT) {
f2fs_stop_checkpoint(sbi);
}
- return;
+ return 0;
}
- update_inode(inode, node_page);
+ ret = update_inode(inode, node_page);
f2fs_put_page(node_page, 1);
+ return ret;
}
int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
@@ -299,9 +303,8 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
* We need to balance fs here to prevent from producing dirty node pages
* during the urgent cleaning time when runing out of free sections.
*/
- update_inode_page(inode);
-
- f2fs_balance_fs(sbi);
+ if (update_inode_page(inode))
+ f2fs_balance_fs(sbi, true);
return 0;
}
@@ -327,7 +330,7 @@ void f2fs_evict_inode(struct inode *inode)
goto out_clear;
f2fs_bug_on(sbi, get_dirty_pages(inode));
- remove_dirty_dir_inode(inode);
+ remove_dirty_inode(inode);
f2fs_destroy_extent_tree(inode);
@@ -357,9 +360,9 @@ no_delete:
if (xnid)
invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
if (is_inode_flag_set(fi, FI_APPEND_WRITE))
- add_dirty_inode(sbi, inode->i_ino, APPEND_INO);
+ add_ino_entry(sbi, inode->i_ino, APPEND_INO);
if (is_inode_flag_set(fi, FI_UPDATE_WRITE))
- add_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
+ add_ino_entry(sbi, inode->i_ino, UPDATE_INO);
if (is_inode_flag_set(fi, FI_FREE_NID)) {
if (err && err != -ENOENT)
alloc_nid_done(sbi, inode->i_ino);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 2c32110f9..6f944e5eb 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -60,7 +60,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode))
f2fs_set_encrypted_inode(inode);
- if (f2fs_may_inline_data(inode))
+ if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode))
set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
if (f2fs_may_inline_dentry(inode))
set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY);
@@ -128,8 +128,6 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
nid_t ino = 0;
int err;
- f2fs_balance_fs(sbi);
-
inode = f2fs_new_inode(dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -142,6 +140,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
inode->i_mapping->a_ops = &f2fs_dblock_aops;
ino = inode->i_ino;
+ f2fs_balance_fs(sbi, true);
+
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
@@ -172,7 +172,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
!f2fs_is_child_context_consistent_with_parent(dir, inode))
return -EPERM;
- f2fs_balance_fs(sbi);
+ f2fs_balance_fs(sbi, true);
inode->i_ctime = CURRENT_TIME;
ihold(inode);
@@ -214,6 +214,15 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino)
struct page *page;
int err = 0;
+ if (f2fs_readonly(sbi->sb)) {
+ f2fs_msg(sbi->sb, KERN_INFO,
+ "skip recovering inline_dots inode (ino:%lu, pino:%u) "
+ "in readonly mountpoint", dir->i_ino, pino);
+ return 0;
+ }
+
+ f2fs_balance_fs(sbi, true);
+
f2fs_lock_op(sbi);
de = f2fs_find_entry(dir, &dot, &page);
@@ -288,12 +297,13 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
int err = -ENOENT;
trace_f2fs_unlink_enter(dir, dentry);
- f2fs_balance_fs(sbi);
de = f2fs_find_entry(dir, &dentry->d_name, &page);
if (!de)
goto fail;
+ f2fs_balance_fs(sbi, true);
+
f2fs_lock_op(sbi);
err = acquire_orphan_inode(sbi);
if (err) {
@@ -315,12 +325,15 @@ fail:
return err;
}
-static const char *f2fs_follow_link(struct dentry *dentry, void **cookie)
+static const char *f2fs_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- const char *link = page_follow_link_light(dentry, cookie);
+ const char *link = page_get_link(dentry, inode, done);
if (!IS_ERR(link) && !*link) {
/* this is broken symlink case */
- page_put_link(NULL, *cookie);
+ do_delayed_call(done);
+ clear_delayed_call(done);
link = ERR_PTR(-ENOENT);
}
return link;
@@ -341,8 +354,6 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
if (len > dir->i_sb->s_blocksize)
return -ENAMETOOLONG;
- f2fs_balance_fs(sbi);
-
inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -351,8 +362,11 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
inode->i_op = &f2fs_encrypted_symlink_inode_operations;
else
inode->i_op = &f2fs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &f2fs_dblock_aops;
+ f2fs_balance_fs(sbi, true);
+
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
@@ -433,8 +447,6 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct inode *inode;
int err;
- f2fs_balance_fs(sbi);
-
inode = f2fs_new_inode(dir, S_IFDIR | mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -444,6 +456,8 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
inode->i_mapping->a_ops = &f2fs_dblock_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO);
+ f2fs_balance_fs(sbi, true);
+
set_inode_flag(F2FS_I(inode), FI_INC_LINK);
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
@@ -481,8 +495,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
struct inode *inode;
int err = 0;
- f2fs_balance_fs(sbi);
-
inode = f2fs_new_inode(dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -490,6 +502,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
init_special_inode(inode, inode->i_mode, rdev);
inode->i_op = &f2fs_special_inode_operations;
+ f2fs_balance_fs(sbi, true);
+
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
@@ -516,9 +530,6 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
struct inode *inode;
int err;
- if (!whiteout)
- f2fs_balance_fs(sbi);
-
inode = f2fs_new_inode(dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -532,6 +543,8 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
inode->i_mapping->a_ops = &f2fs_dblock_aops;
}
+ f2fs_balance_fs(sbi, true);
+
f2fs_lock_op(sbi);
err = acquire_orphan_inode(sbi);
if (err)
@@ -604,8 +617,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out;
}
- f2fs_balance_fs(sbi);
-
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_entry)
goto out;
@@ -635,6 +646,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (!new_entry)
goto out_whiteout;
+ f2fs_balance_fs(sbi, true);
+
f2fs_lock_op(sbi);
err = acquire_orphan_inode(sbi);
@@ -666,6 +679,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
update_inode_page(old_inode);
update_inode_page(new_inode);
} else {
+ f2fs_balance_fs(sbi, true);
+
f2fs_lock_op(sbi);
err = f2fs_add_link(new_dentry, old_inode);
@@ -763,8 +778,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
new_inode)))
return -EPERM;
- f2fs_balance_fs(sbi);
-
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_entry)
goto out;
@@ -807,6 +820,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out_new_dir;
}
+ f2fs_balance_fs(sbi, true);
+
f2fs_lock_op(sbi);
err = update_dent_inode(old_inode, new_inode, &new_dentry->d_name);
@@ -923,18 +938,22 @@ static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry,
}
#ifdef CONFIG_F2FS_FS_ENCRYPTION
-static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cookie)
+static const char *f2fs_encrypted_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
struct page *cpage = NULL;
char *caddr, *paddr = NULL;
- struct f2fs_str cstr;
+ struct f2fs_str cstr = FSTR_INIT(NULL, 0);
struct f2fs_str pstr = FSTR_INIT(NULL, 0);
- struct inode *inode = d_inode(dentry);
struct f2fs_encrypted_symlink_data *sd;
loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
u32 max_size = inode->i_sb->s_blocksize;
int res;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
res = f2fs_get_encryption_info(inode);
if (res)
return ERR_PTR(res);
@@ -942,12 +961,18 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook
cpage = read_mapping_page(inode->i_mapping, 0, NULL);
if (IS_ERR(cpage))
return ERR_CAST(cpage);
- caddr = kmap(cpage);
+ caddr = page_address(cpage);
caddr[size] = 0;
/* Symlink is encrypted */
sd = (struct f2fs_encrypted_symlink_data *)caddr;
cstr.len = le16_to_cpu(sd->len);
+
+ /* this is broken symlink case */
+ if (unlikely(cstr.len == 0)) {
+ res = -ENOENT;
+ goto errout;
+ }
cstr.name = kmalloc(cstr.len, GFP_NOFS);
if (!cstr.name) {
res = -ENOMEM;
@@ -956,7 +981,7 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook
memcpy(cstr.name, sd->encrypted_path, cstr.len);
/* this is broken symlink case */
- if (cstr.name[0] == 0 && cstr.len == 0) {
+ if (unlikely(cstr.name[0] == 0)) {
res = -ENOENT;
goto errout;
}
@@ -982,27 +1007,27 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook
/* Null-terminate the name */
paddr[res] = '\0';
- kunmap(cpage);
page_cache_release(cpage);
- return *cookie = paddr;
+ set_delayed_call(done, kfree_link, paddr);
+ return paddr;
errout:
kfree(cstr.name);
f2fs_fname_crypto_free_buffer(&pstr);
- kunmap(cpage);
page_cache_release(cpage);
return ERR_PTR(res);
}
const struct inode_operations f2fs_encrypted_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = f2fs_encrypted_follow_link,
- .put_link = kfree_put_link,
+ .get_link = f2fs_encrypted_get_link,
.getattr = f2fs_getattr,
.setattr = f2fs_setattr,
+#ifdef CONFIG_F2FS_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = f2fs_listxattr,
.removexattr = generic_removexattr,
+#endif
};
#endif
@@ -1031,8 +1056,7 @@ const struct inode_operations f2fs_dir_inode_operations = {
const struct inode_operations f2fs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = f2fs_follow_link,
- .put_link = page_put_link,
+ .get_link = f2fs_get_link,
.getattr = f2fs_getattr,
.setattr = f2fs_setattr,
#ifdef CONFIG_F2FS_FS_XATTR
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 7bcbc6e9c..342597a58 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -65,13 +65,14 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
} else if (type == EXTENT_CACHE) {
- mem_size = (sbi->total_ext_tree * sizeof(struct extent_tree) +
+ mem_size = (atomic_read(&sbi->total_ext_tree) *
+ sizeof(struct extent_tree) +
atomic_read(&sbi->total_ext_node) *
sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
} else {
- if (sbi->sb->s_bdi->wb.dirty_exceeded)
- return false;
+ if (!sbi->sb->s_bdi->wb.dirty_exceeded)
+ return true;
}
return res;
}
@@ -261,13 +262,11 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
{
struct nat_entry *e;
- down_write(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
if (!e) {
e = grab_nat_entry(nm_i, nid);
node_info_from_raw_nat(&e->ni, ne);
}
- up_write(&nm_i->nat_tree_lock);
}
static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
@@ -379,6 +378,8 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
memset(&ne, 0, sizeof(struct f2fs_nat_entry));
+ down_write(&nm_i->nat_tree_lock);
+
/* Check current segment summary */
mutex_lock(&curseg->curseg_mutex);
i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
@@ -399,6 +400,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
cache:
/* cache nat entry */
cache_nat_entry(NM_I(sbi), nid, &ne);
+ up_write(&nm_i->nat_tree_lock);
}
/*
@@ -676,7 +678,8 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
ret = truncate_dnode(&rdn);
if (ret < 0)
goto out_err;
- set_nid(page, i, 0, false);
+ if (set_nid(page, i, 0, false))
+ dn->node_changed = true;
}
} else {
child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1;
@@ -689,7 +692,8 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
rdn.nid = child_nid;
ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1);
if (ret == (NIDS_PER_BLOCK + 1)) {
- set_nid(page, i, 0, false);
+ if (set_nid(page, i, 0, false))
+ dn->node_changed = true;
child_nofs += ret;
} else if (ret < 0 && ret != -ENOENT) {
goto out_err;
@@ -750,7 +754,8 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
err = truncate_dnode(dn);
if (err < 0)
goto fail;
- set_nid(pages[idx], i, 0, false);
+ if (set_nid(pages[idx], i, 0, false))
+ dn->node_changed = true;
}
if (offset[idx + 1] == 0) {
@@ -975,7 +980,8 @@ struct page *new_node_page(struct dnode_of_data *dn,
fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
set_cold_node(dn->inode, page);
SetPageUptodate(page);
- set_page_dirty(page);
+ if (set_page_dirty(page))
+ dn->node_changed = true;
if (f2fs_has_xattr_block(ofs))
F2FS_I(dn->inode)->i_xattr_nid = dn->nid;
@@ -1035,6 +1041,10 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
struct page *apage;
int err;
+ if (!nid)
+ return;
+ f2fs_bug_on(sbi, check_nid_range(sbi, nid));
+
apage = find_get_page(NODE_MAPPING(sbi), nid);
if (apage && PageUptodate(apage)) {
f2fs_put_page(apage, 0);
@@ -1050,51 +1060,38 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
f2fs_put_page(apage, err ? 1 : 0);
}
-struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
+/*
+ * readahead MAX_RA_NODE number of node pages.
+ */
+void ra_node_pages(struct page *parent, int start)
{
- struct page *page;
- int err;
-repeat:
- page = grab_cache_page(NODE_MAPPING(sbi), nid);
- if (!page)
- return ERR_PTR(-ENOMEM);
+ struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
+ struct blk_plug plug;
+ int i, end;
+ nid_t nid;
- err = read_node_page(page, READ_SYNC);
- if (err < 0) {
- f2fs_put_page(page, 1);
- return ERR_PTR(err);
- } else if (err != LOCKED_PAGE) {
- lock_page(page);
- }
+ blk_start_plug(&plug);
- if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) {
- ClearPageUptodate(page);
- f2fs_put_page(page, 1);
- return ERR_PTR(-EIO);
- }
- if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
- f2fs_put_page(page, 1);
- goto repeat;
+ /* Then, try readahead for siblings of the desired node */
+ end = start + MAX_RA_NODE;
+ end = min(end, NIDS_PER_BLOCK);
+ for (i = start; i < end; i++) {
+ nid = get_nid(parent, i, false);
+ ra_node_page(sbi, nid);
}
- return page;
+
+ blk_finish_plug(&plug);
}
-/*
- * Return a locked page for the desired node page.
- * And, readahead MAX_RA_NODE number of node pages.
- */
-struct page *get_node_page_ra(struct page *parent, int start)
+struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
+ struct page *parent, int start)
{
- struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
- struct blk_plug plug;
struct page *page;
- int err, i, end;
- nid_t nid;
+ int err;
- /* First, try getting the desired direct node. */
- nid = get_nid(parent, start, false);
if (!nid)
return ERR_PTR(-ENOENT);
+ f2fs_bug_on(sbi, check_nid_range(sbi, nid));
repeat:
page = grab_cache_page(NODE_MAPPING(sbi), nid);
if (!page)
@@ -1108,46 +1105,53 @@ repeat:
goto page_hit;
}
- blk_start_plug(&plug);
-
- /* Then, try readahead for siblings of the desired node */
- end = start + MAX_RA_NODE;
- end = min(end, NIDS_PER_BLOCK);
- for (i = start + 1; i < end; i++) {
- nid = get_nid(parent, i, false);
- if (!nid)
- continue;
- ra_node_page(sbi, nid);
- }
-
- blk_finish_plug(&plug);
+ if (parent)
+ ra_node_pages(parent, start + 1);
lock_page(page);
+
+ if (unlikely(!PageUptodate(page))) {
+ f2fs_put_page(page, 1);
+ return ERR_PTR(-EIO);
+ }
if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
f2fs_put_page(page, 1);
goto repeat;
}
page_hit:
- if (unlikely(!PageUptodate(page))) {
- f2fs_put_page(page, 1);
- return ERR_PTR(-EIO);
- }
+ f2fs_bug_on(sbi, nid != nid_of_node(page));
return page;
}
+struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
+{
+ return __get_node_page(sbi, nid, NULL, 0);
+}
+
+struct page *get_node_page_ra(struct page *parent, int start)
+{
+ struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
+ nid_t nid = get_nid(parent, start, false);
+
+ return __get_node_page(sbi, nid, parent, start);
+}
+
void sync_inode_page(struct dnode_of_data *dn)
{
+ int ret = 0;
+
if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) {
- update_inode(dn->inode, dn->node_page);
+ ret = update_inode(dn->inode, dn->node_page);
} else if (dn->inode_page) {
if (!dn->inode_page_locked)
lock_page(dn->inode_page);
- update_inode(dn->inode, dn->inode_page);
+ ret = update_inode(dn->inode, dn->inode_page);
if (!dn->inode_page_locked)
unlock_page(dn->inode_page);
} else {
- update_inode_page(dn->inode);
+ ret = update_inode_page(dn->inode);
}
+ dn->node_changed = ret ? true: false;
}
int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
@@ -1175,6 +1179,11 @@ next_step:
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
+ if (unlikely(f2fs_cp_error(sbi))) {
+ pagevec_release(&pvec);
+ return -EIO;
+ }
+
/*
* flushing sequence with step:
* 0. indirect nodes
@@ -1349,7 +1358,7 @@ static int f2fs_write_node_page(struct page *page,
up_read(&sbi->node_write);
unlock_page(page);
- if (wbc->for_reclaim)
+ if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi)))
f2fs_submit_merged_bio(sbi, NODE, WRITE);
return 0;
@@ -1440,13 +1449,10 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
if (build) {
/* do not add allocated nids */
- down_read(&nm_i->nat_tree_lock);
ne = __lookup_nat_cache(nm_i, nid);
- if (ne &&
- (!get_nat_flag(ne, IS_CHECKPOINTED) ||
+ if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
nat_get_blkaddr(ne) != NULL_ADDR))
allocated = true;
- up_read(&nm_i->nat_tree_lock);
if (allocated)
return 0;
}
@@ -1532,6 +1538,8 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
META_NAT, true);
+ down_read(&nm_i->nat_tree_lock);
+
while (1) {
struct page *page = get_current_nat_page(sbi, nid);
@@ -1560,6 +1568,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
remove_free_nid(nm_i, nid);
}
mutex_unlock(&curseg->curseg_mutex);
+ up_read(&nm_i->nat_tree_lock);
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
nm_i->ra_nid_pages, META_NAT, false);
@@ -1582,8 +1591,6 @@ retry:
/* We should not use stale free nids created by build_free_nids */
if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
- struct node_info ni;
-
f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
list_for_each_entry(i, &nm_i->free_nid_list, list)
if (i->state == NID_NEW)
@@ -1594,13 +1601,6 @@ retry:
i->state = NID_ALLOC;
nm_i->fcnt--;
spin_unlock(&nm_i->free_nid_list_lock);
-
- /* check nid is allocated already */
- get_node_info(sbi, *nid, &ni);
- if (ni.blk_addr != NULL_ADDR) {
- alloc_nid_done(sbi, *nid);
- goto retry;
- }
return true;
}
spin_unlock(&nm_i->free_nid_list_lock);
@@ -1842,14 +1842,12 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
raw_ne = nat_in_journal(sum, i);
- down_write(&nm_i->nat_tree_lock);
ne = __lookup_nat_cache(nm_i, nid);
if (!ne) {
ne = grab_nat_entry(nm_i, nid);
node_info_from_raw_nat(&ne->ni, &raw_ne);
}
__set_nat_cache_dirty(nm_i, ne);
- up_write(&nm_i->nat_tree_lock);
}
update_nats_in_cursum(sum, -i);
mutex_unlock(&curseg->curseg_mutex);
@@ -1883,7 +1881,6 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
struct f2fs_nat_block *nat_blk;
struct nat_entry *ne, *cur;
struct page *page = NULL;
- struct f2fs_nm_info *nm_i = NM_I(sbi);
/*
* there are two steps to flush nat entries:
@@ -1920,12 +1917,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
raw_ne = &nat_blk->entries[nid - start_nid];
}
raw_nat_from_node_info(raw_ne, &ne->ni);
-
- down_write(&NM_I(sbi)->nat_tree_lock);
nat_reset_flag(ne);
__clear_nat_cache_dirty(NM_I(sbi), ne);
- up_write(&NM_I(sbi)->nat_tree_lock);
-
if (nat_get_blkaddr(ne) == NULL_ADDR)
add_free_nid(sbi, nid, false);
}
@@ -1937,9 +1930,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
f2fs_bug_on(sbi, set->entry_cnt);
- down_write(&nm_i->nat_tree_lock);
radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
- up_write(&nm_i->nat_tree_lock);
kmem_cache_free(nat_entry_set_slab, set);
}
@@ -1959,6 +1950,9 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
if (!nm_i->dirty_nat_cnt)
return;
+
+ down_write(&nm_i->nat_tree_lock);
+
/*
* if there are no enough space in journal to store dirty nat
* entries, remove all entries from journal and merge them
@@ -1967,7 +1961,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
remove_nats_in_journal(sbi);
- down_write(&nm_i->nat_tree_lock);
while ((found = __gang_lookup_nat_set(nm_i,
set_idx, SETVEC_SIZE, setvec))) {
unsigned idx;
@@ -1976,12 +1969,13 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
__adjust_nat_entry_set(setvec[idx], &sets,
MAX_NAT_JENTRIES(sum));
}
- up_write(&nm_i->nat_tree_lock);
/* flush dirty nats in nat entry set */
list_for_each_entry_safe(set, tmp, &sets, set_list)
__flush_nat_entry_set(sbi, set);
+ up_write(&nm_i->nat_tree_lock);
+
f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
}
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index e4fffd2d9..d4d1f636f 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -183,7 +183,7 @@ static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start)
block_addr = (pgoff_t)(nm_i->nat_blkaddr +
(seg_off << sbi->log_blocks_per_seg << 1) +
- (block_off & ((1 << sbi->log_blocks_per_seg) - 1)));
+ (block_off & (sbi->blocks_per_seg - 1)));
if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
block_addr += sbi->blocks_per_seg;
@@ -317,7 +317,7 @@ static inline bool IS_DNODE(struct page *node_page)
return true;
}
-static inline void set_nid(struct page *p, int off, nid_t nid, bool i)
+static inline int set_nid(struct page *p, int off, nid_t nid, bool i)
{
struct f2fs_node *rn = F2FS_NODE(p);
@@ -327,7 +327,7 @@ static inline void set_nid(struct page *p, int off, nid_t nid, bool i)
rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);
else
rn->in.nid[off] = cpu_to_le32(nid);
- set_page_dirty(p);
+ return set_page_dirty(p);
}
static inline nid_t get_nid(struct page *p, int off, bool i)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index cbf74f47c..589b20b86 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -168,6 +168,32 @@ static void recover_inode(struct inode *inode, struct page *page)
ino_of_node(page), name);
}
+static bool is_same_inode(struct inode *inode, struct page *ipage)
+{
+ struct f2fs_inode *ri = F2FS_INODE(ipage);
+ struct timespec disk;
+
+ if (!IS_INODE(ipage))
+ return true;
+
+ disk.tv_sec = le64_to_cpu(ri->i_ctime);
+ disk.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
+ if (timespec_compare(&inode->i_ctime, &disk) > 0)
+ return false;
+
+ disk.tv_sec = le64_to_cpu(ri->i_atime);
+ disk.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
+ if (timespec_compare(&inode->i_atime, &disk) > 0)
+ return false;
+
+ disk.tv_sec = le64_to_cpu(ri->i_mtime);
+ disk.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
+ if (timespec_compare(&inode->i_mtime, &disk) > 0)
+ return false;
+
+ return true;
+}
+
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
@@ -197,7 +223,10 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
goto next;
entry = get_fsync_inode(head, ino_of_node(page));
- if (!entry) {
+ if (entry) {
+ if (!is_same_inode(entry->inode, page))
+ goto next;
+ } else {
if (IS_INODE(page) && is_dent_dnode(page)) {
err = recover_inode_page(sbi, page);
if (err)
@@ -459,8 +488,7 @@ out:
return err;
}
-static int recover_data(struct f2fs_sb_info *sbi,
- struct list_head *head, int type)
+static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head)
{
unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
struct curseg_info *curseg;
@@ -469,7 +497,7 @@ static int recover_data(struct f2fs_sb_info *sbi,
block_t blkaddr;
/* get node pages in the current segment */
- curseg = CURSEG_I(sbi, type);
+ curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
while (1) {
@@ -556,7 +584,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
need_writecp = true;
/* step #2: recover data */
- err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
+ err = recover_data(sbi, &inode_list);
if (!err)
f2fs_bug_on(sbi, !list_empty(&inode_list));
out:
@@ -595,7 +623,7 @@ out:
.reason = CP_RECOVERY,
};
mutex_unlock(&sbi->cp_mutex);
- write_checkpoint(sbi, &cpc);
+ err = write_checkpoint(sbi, &cpc);
} else {
mutex_unlock(&sbi->cp_mutex);
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index f77b32584..5904a411c 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -86,6 +86,7 @@ static inline unsigned long __reverse_ffs(unsigned long word)
/*
* __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
* f2fs_set_bit makes MSB and LSB reversed in a byte.
+ * @size must be integral times of unsigned long.
* Example:
* MSB <--> LSB
* f2fs_set_bit(0, bitmap) => 1000 0000
@@ -95,94 +96,73 @@ static unsigned long __find_rev_next_bit(const unsigned long *addr,
unsigned long size, unsigned long offset)
{
const unsigned long *p = addr + BIT_WORD(offset);
- unsigned long result = offset & ~(BITS_PER_LONG - 1);
+ unsigned long result = size;
unsigned long tmp;
if (offset >= size)
return size;
- size -= result;
+ size -= (offset & ~(BITS_PER_LONG - 1));
offset %= BITS_PER_LONG;
- if (!offset)
- goto aligned;
-
- tmp = __reverse_ulong((unsigned char *)p);
- tmp &= ~0UL >> offset;
-
- if (size < BITS_PER_LONG)
- goto found_first;
- if (tmp)
- goto found_middle;
-
- size -= BITS_PER_LONG;
- result += BITS_PER_LONG;
- p++;
-aligned:
- while (size & ~(BITS_PER_LONG-1)) {
+
+ while (1) {
+ if (*p == 0)
+ goto pass;
+
tmp = __reverse_ulong((unsigned char *)p);
+
+ tmp &= ~0UL >> offset;
+ if (size < BITS_PER_LONG)
+ tmp &= (~0UL << (BITS_PER_LONG - size));
if (tmp)
- goto found_middle;
- result += BITS_PER_LONG;
+ goto found;
+pass:
+ if (size <= BITS_PER_LONG)
+ break;
size -= BITS_PER_LONG;
+ offset = 0;
p++;
}
- if (!size)
- return result;
-
- tmp = __reverse_ulong((unsigned char *)p);
-found_first:
- tmp &= (~0UL << (BITS_PER_LONG - size));
- if (!tmp) /* Are any bits set? */
- return result + size; /* Nope. */
-found_middle:
- return result + __reverse_ffs(tmp);
+ return result;
+found:
+ return result - size + __reverse_ffs(tmp);
}
static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
unsigned long size, unsigned long offset)
{
const unsigned long *p = addr + BIT_WORD(offset);
- unsigned long result = offset & ~(BITS_PER_LONG - 1);
+ unsigned long result = size;
unsigned long tmp;
if (offset >= size)
return size;
- size -= result;
+ size -= (offset & ~(BITS_PER_LONG - 1));
offset %= BITS_PER_LONG;
- if (!offset)
- goto aligned;
-
- tmp = __reverse_ulong((unsigned char *)p);
- tmp |= ~((~0UL << offset) >> offset);
-
- if (size < BITS_PER_LONG)
- goto found_first;
- if (tmp != ~0UL)
- goto found_middle;
-
- size -= BITS_PER_LONG;
- result += BITS_PER_LONG;
- p++;
-aligned:
- while (size & ~(BITS_PER_LONG - 1)) {
+
+ while (1) {
+ if (*p == ~0UL)
+ goto pass;
+
tmp = __reverse_ulong((unsigned char *)p);
+
+ if (offset)
+ tmp |= ~0UL << (BITS_PER_LONG - offset);
+ if (size < BITS_PER_LONG)
+ tmp |= ~0UL >> size;
if (tmp != ~0UL)
- goto found_middle;
- result += BITS_PER_LONG;
+ goto found;
+pass:
+ if (size <= BITS_PER_LONG)
+ break;
size -= BITS_PER_LONG;
+ offset = 0;
p++;
}
- if (!size)
- return result;
-
- tmp = __reverse_ulong((unsigned char *)p);
-found_first:
- tmp |= ~(~0UL << (BITS_PER_LONG - size));
- if (tmp == ~0UL) /* Are any bits zero? */
- return result + size; /* Nope. */
-found_middle:
- return result + __reverse_ffz(tmp);
+ return result;
+found:
+ return result - size + __reverse_ffz(tmp);
}
void register_inmem_page(struct inode *inode, struct page *page)
@@ -233,7 +213,7 @@ int commit_inmem_pages(struct inode *inode, bool abort)
* inode becomes free by iget_locked in f2fs_iget.
*/
if (!abort) {
- f2fs_balance_fs(sbi);
+ f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
}
@@ -257,6 +237,7 @@ int commit_inmem_pages(struct inode *inode, bool abort)
submit_bio = true;
}
} else {
+ ClearPageUptodate(cur->page);
trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP);
}
set_page_private(cur->page, 0);
@@ -281,8 +262,10 @@ int commit_inmem_pages(struct inode *inode, bool abort)
* This function balances dirty node and dentry pages.
* In addition, it controls garbage collection.
*/
-void f2fs_balance_fs(struct f2fs_sb_info *sbi)
+void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
{
+ if (!need)
+ return;
/*
* We should do GC or end up with checkpoint, if there are so many dirty
* dir/node pages without enough free segments.
@@ -310,8 +293,12 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
if (!available_free_memory(sbi, NAT_ENTRIES) ||
excess_prefree_segs(sbi) ||
!available_free_memory(sbi, INO_ENTRIES) ||
- jiffies > sbi->cp_expires)
+ (is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) {
+ if (test_opt(sbi, DATA_FLUSH))
+ sync_dirty_inodes(sbi, FILE_INODE);
f2fs_sync_fs(sbi->sb, true);
+ stat_inc_bg_cp_count(sbi->stat_info);
+ }
}
static int issue_flush_thread(void *data)
@@ -1134,6 +1121,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
__u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
unsigned int start_segno, end_segno;
struct cp_control cpc;
+ int err = 0;
if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
return -EINVAL;
@@ -1164,12 +1152,12 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
sbi->segs_per_sec) - 1, end_segno);
mutex_lock(&sbi->gc_mutex);
- write_checkpoint(sbi, &cpc);
+ err = write_checkpoint(sbi, &cpc);
mutex_unlock(&sbi->gc_mutex);
}
out:
range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
- return 0;
+ return err;
}
static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
@@ -1749,13 +1737,13 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
if (le32_to_cpu(nid_in_journal(sum, i)) == val)
return i;
}
- if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES)
+ if (alloc && __has_cursum_space(sum, 1, NAT_JOURNAL))
return update_nats_in_cursum(sum, 1);
} else if (type == SIT_JOURNAL) {
for (i = 0; i < sits_in_cursum(sum); i++)
if (le32_to_cpu(segno_in_journal(sum, i)) == val)
return i;
- if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES)
+ if (alloc && __has_cursum_space(sum, 1, SIT_JOURNAL))
return update_sits_in_cursum(sum, 1);
}
return -1;
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index da0d8e0b5..93606f281 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -32,7 +32,8 @@ static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)
static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi)
{
- return sbi->total_ext_tree + atomic_read(&sbi->total_ext_node);
+ return atomic_read(&sbi->total_zombie_tree) +
+ atomic_read(&sbi->total_ext_node);
}
unsigned long f2fs_shrink_count(struct shrinker *shrink,
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 3a65e0132..6134832ba 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -67,6 +67,7 @@ enum {
Opt_extent_cache,
Opt_noextent_cache,
Opt_noinline_data,
+ Opt_data_flush,
Opt_err,
};
@@ -91,6 +92,7 @@ static match_table_t f2fs_tokens = {
{Opt_extent_cache, "extent_cache"},
{Opt_noextent_cache, "noextent_cache"},
{Opt_noinline_data, "noinline_data"},
+ {Opt_data_flush, "data_flush"},
{Opt_err, NULL},
};
@@ -216,7 +218,8 @@ F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, cp_interval);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
@@ -235,6 +238,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(ram_thresh),
ATTR_LIST(ra_nid_pages),
ATTR_LIST(cp_interval),
+ ATTR_LIST(idle_interval),
NULL,
};
@@ -406,6 +410,9 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_noinline_data:
clear_opt(sbi, INLINE_DATA);
break;
+ case Opt_data_flush:
+ set_opt(sbi, DATA_FLUSH);
+ break;
default:
f2fs_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" or missing value",
@@ -432,6 +439,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
fi->i_current_depth = 1;
fi->i_advise = 0;
init_rwsem(&fi->i_sem);
+ INIT_LIST_HEAD(&fi->dirty_list);
INIT_LIST_HEAD(&fi->inmem_pages);
mutex_init(&fi->inmem_lock);
@@ -548,7 +556,7 @@ static void f2fs_put_super(struct super_block *sb)
* normally superblock is clean, so we need to release this.
* In addition, EIO will skip do checkpoint, we need this as well.
*/
- release_dirty_inode(sbi);
+ release_ino_entry(sbi);
release_discard_addrs(sbi);
f2fs_leave_shrinker(sbi);
@@ -566,13 +574,14 @@ static void f2fs_put_super(struct super_block *sb)
wait_for_completion(&sbi->s_kobj_unregister);
sb->s_fs_info = NULL;
- brelse(sbi->raw_super_buf);
+ kfree(sbi->raw_super);
kfree(sbi);
}
int f2fs_sync_fs(struct super_block *sb, int sync)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ int err = 0;
trace_f2fs_sync_fs(sb, sync);
@@ -582,14 +591,12 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
cpc.reason = __get_cp_reason(sbi);
mutex_lock(&sbi->gc_mutex);
- write_checkpoint(sbi, &cpc);
+ err = write_checkpoint(sbi, &cpc);
mutex_unlock(&sbi->gc_mutex);
- } else {
- f2fs_balance_fs(sbi);
}
f2fs_trace_ios(NULL, 1);
- return 0;
+ return err;
}
static int f2fs_freeze(struct super_block *sb)
@@ -686,6 +693,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",extent_cache");
else
seq_puts(seq, ",noextent_cache");
+ if (test_opt(sbi, DATA_FLUSH))
+ seq_puts(seq, ",data_flush");
seq_printf(seq, ",active_logs=%u", sbi->active_logs);
return 0;
@@ -898,7 +907,7 @@ static const struct export_operations f2fs_export_ops = {
.get_parent = f2fs_get_parent,
};
-static loff_t max_file_size(unsigned bits)
+static loff_t max_file_blocks(void)
{
loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS);
loff_t leaf_count = ADDRS_PER_BLOCK;
@@ -914,10 +923,82 @@ static loff_t max_file_size(unsigned bits)
leaf_count *= NIDS_PER_BLOCK;
result += leaf_count;
- result <<= bits;
return result;
}
+static inline bool sanity_check_area_boundary(struct super_block *sb,
+ struct f2fs_super_block *raw_super)
+{
+ u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
+ u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr);
+ u32 sit_blkaddr = le32_to_cpu(raw_super->sit_blkaddr);
+ u32 nat_blkaddr = le32_to_cpu(raw_super->nat_blkaddr);
+ u32 ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
+ u32 main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
+ u32 segment_count_ckpt = le32_to_cpu(raw_super->segment_count_ckpt);
+ u32 segment_count_sit = le32_to_cpu(raw_super->segment_count_sit);
+ u32 segment_count_nat = le32_to_cpu(raw_super->segment_count_nat);
+ u32 segment_count_ssa = le32_to_cpu(raw_super->segment_count_ssa);
+ u32 segment_count_main = le32_to_cpu(raw_super->segment_count_main);
+ u32 segment_count = le32_to_cpu(raw_super->segment_count);
+ u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
+
+ if (segment0_blkaddr != cp_blkaddr) {
+ f2fs_msg(sb, KERN_INFO,
+ "Mismatch start address, segment0(%u) cp_blkaddr(%u)",
+ segment0_blkaddr, cp_blkaddr);
+ return true;
+ }
+
+ if (cp_blkaddr + (segment_count_ckpt << log_blocks_per_seg) !=
+ sit_blkaddr) {
+ f2fs_msg(sb, KERN_INFO,
+ "Wrong CP boundary, start(%u) end(%u) blocks(%u)",
+ cp_blkaddr, sit_blkaddr,
+ segment_count_ckpt << log_blocks_per_seg);
+ return true;
+ }
+
+ if (sit_blkaddr + (segment_count_sit << log_blocks_per_seg) !=
+ nat_blkaddr) {
+ f2fs_msg(sb, KERN_INFO,
+ "Wrong SIT boundary, start(%u) end(%u) blocks(%u)",
+ sit_blkaddr, nat_blkaddr,
+ segment_count_sit << log_blocks_per_seg);
+ return true;
+ }
+
+ if (nat_blkaddr + (segment_count_nat << log_blocks_per_seg) !=
+ ssa_blkaddr) {
+ f2fs_msg(sb, KERN_INFO,
+ "Wrong NAT boundary, start(%u) end(%u) blocks(%u)",
+ nat_blkaddr, ssa_blkaddr,
+ segment_count_nat << log_blocks_per_seg);
+ return true;
+ }
+
+ if (ssa_blkaddr + (segment_count_ssa << log_blocks_per_seg) !=
+ main_blkaddr) {
+ f2fs_msg(sb, KERN_INFO,
+ "Wrong SSA boundary, start(%u) end(%u) blocks(%u)",
+ ssa_blkaddr, main_blkaddr,
+ segment_count_ssa << log_blocks_per_seg);
+ return true;
+ }
+
+ if (main_blkaddr + (segment_count_main << log_blocks_per_seg) !=
+ segment0_blkaddr + (segment_count << log_blocks_per_seg)) {
+ f2fs_msg(sb, KERN_INFO,
+ "Wrong MAIN_AREA boundary, start(%u) end(%u) blocks(%u)",
+ main_blkaddr,
+ segment0_blkaddr + (segment_count << log_blocks_per_seg),
+ segment_count_main << log_blocks_per_seg);
+ return true;
+ }
+
+ return false;
+}
+
static int sanity_check_raw_super(struct super_block *sb,
struct f2fs_super_block *raw_super)
{
@@ -947,6 +1028,14 @@ static int sanity_check_raw_super(struct super_block *sb,
return 1;
}
+ /* check log blocks per segment */
+ if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) {
+ f2fs_msg(sb, KERN_INFO,
+ "Invalid log blocks per segment (%u)\n",
+ le32_to_cpu(raw_super->log_blocks_per_seg));
+ return 1;
+ }
+
/* Currently, support 512/1024/2048/4096 bytes sector size */
if (le32_to_cpu(raw_super->log_sectorsize) >
F2FS_MAX_LOG_SECTOR_SIZE ||
@@ -965,6 +1054,23 @@ static int sanity_check_raw_super(struct super_block *sb,
le32_to_cpu(raw_super->log_sectorsize));
return 1;
}
+
+ /* check reserved ino info */
+ if (le32_to_cpu(raw_super->node_ino) != 1 ||
+ le32_to_cpu(raw_super->meta_ino) != 2 ||
+ le32_to_cpu(raw_super->root_ino) != 3) {
+ f2fs_msg(sb, KERN_INFO,
+ "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)",
+ le32_to_cpu(raw_super->node_ino),
+ le32_to_cpu(raw_super->meta_ino),
+ le32_to_cpu(raw_super->root_ino));
+ return 1;
+ }
+
+ /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
+ if (sanity_check_area_boundary(sb, raw_super))
+ return 1;
+
return 0;
}
@@ -1018,7 +1124,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
atomic_set(&sbi->nr_pages[i], 0);
sbi->dir_level = DEF_DIR_LEVEL;
- sbi->cp_interval = DEF_CP_INTERVAL;
+ sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
+ sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL;
clear_sbi_flag(sbi, SBI_NEED_FSCK);
INIT_LIST_HEAD(&sbi->s_list);
@@ -1032,111 +1139,114 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
*/
static int read_raw_super_block(struct super_block *sb,
struct f2fs_super_block **raw_super,
- struct buffer_head **raw_super_buf,
- int *recovery)
+ int *valid_super_block, int *recovery)
{
int block = 0;
- struct buffer_head *buffer;
- struct f2fs_super_block *super;
+ struct buffer_head *bh;
+ struct f2fs_super_block *super, *buf;
int err = 0;
+ super = kzalloc(sizeof(struct f2fs_super_block), GFP_KERNEL);
+ if (!super)
+ return -ENOMEM;
retry:
- buffer = sb_bread(sb, block);
- if (!buffer) {
+ bh = sb_bread(sb, block);
+ if (!bh) {
*recovery = 1;
f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock",
block + 1);
- if (block == 0) {
- block++;
- goto retry;
- } else {
- err = -EIO;
- goto out;
- }
+ err = -EIO;
+ goto next;
}
- super = (struct f2fs_super_block *)
- ((char *)(buffer)->b_data + F2FS_SUPER_OFFSET);
+ buf = (struct f2fs_super_block *)(bh->b_data + F2FS_SUPER_OFFSET);
/* sanity checking of raw super */
- if (sanity_check_raw_super(sb, super)) {
- brelse(buffer);
+ if (sanity_check_raw_super(sb, buf)) {
+ brelse(bh);
*recovery = 1;
f2fs_msg(sb, KERN_ERR,
"Can't find valid F2FS filesystem in %dth superblock",
block + 1);
- if (block == 0) {
- block++;
- goto retry;
- } else {
- err = -EINVAL;
- goto out;
- }
+ err = -EINVAL;
+ goto next;
}
if (!*raw_super) {
- *raw_super_buf = buffer;
+ memcpy(super, buf, sizeof(*super));
+ *valid_super_block = block;
*raw_super = super;
- } else {
- /* already have a valid superblock */
- brelse(buffer);
}
+ brelse(bh);
+next:
/* check the validity of the second superblock */
if (block == 0) {
block++;
goto retry;
}
-out:
/* No valid superblock */
- if (!*raw_super)
+ if (!*raw_super) {
+ kfree(super);
return err;
+ }
return 0;
}
+static int __f2fs_commit_super(struct f2fs_sb_info *sbi, int block)
+{
+ struct f2fs_super_block *super = F2FS_RAW_SUPER(sbi);
+ struct buffer_head *bh;
+ int err;
+
+ bh = sb_getblk(sbi->sb, block);
+ if (!bh)
+ return -EIO;
+
+ lock_buffer(bh);
+ memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super));
+ set_buffer_uptodate(bh);
+ set_buffer_dirty(bh);
+ unlock_buffer(bh);
+
+ /* it's rare case, we can do fua all the time */
+ err = __sync_dirty_buffer(bh, WRITE_FLUSH_FUA);
+ brelse(bh);
+
+ return err;
+}
+
int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
{
- struct buffer_head *sbh = sbi->raw_super_buf;
- sector_t block = sbh->b_blocknr;
int err;
/* write back-up superblock first */
- sbh->b_blocknr = block ? 0 : 1;
- mark_buffer_dirty(sbh);
- err = sync_dirty_buffer(sbh);
-
- sbh->b_blocknr = block;
+ err = __f2fs_commit_super(sbi, sbi->valid_super_block ? 0 : 1);
/* if we are in recovery path, skip writing valid superblock */
if (recover || err)
- goto out;
+ return err;
/* write current valid superblock */
- mark_buffer_dirty(sbh);
- err = sync_dirty_buffer(sbh);
-out:
- clear_buffer_write_io_error(sbh);
- set_buffer_uptodate(sbh);
- return err;
+ return __f2fs_commit_super(sbi, sbi->valid_super_block);
}
static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
{
struct f2fs_sb_info *sbi;
struct f2fs_super_block *raw_super;
- struct buffer_head *raw_super_buf;
struct inode *root;
long err;
bool retry = true, need_fsck = false;
char *options = NULL;
- int recovery, i;
+ int recovery, i, valid_super_block;
try_onemore:
err = -EINVAL;
raw_super = NULL;
- raw_super_buf = NULL;
+ valid_super_block = -1;
recovery = 0;
/* allocate memory for f2fs-specific super block info */
@@ -1150,7 +1260,8 @@ try_onemore:
goto free_sbi;
}
- err = read_raw_super_block(sb, &raw_super, &raw_super_buf, &recovery);
+ err = read_raw_super_block(sb, &raw_super, &valid_super_block,
+ &recovery);
if (err)
goto free_sbi;
@@ -1167,7 +1278,9 @@ try_onemore:
if (err)
goto free_options;
- sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize));
+ sbi->max_file_blocks = max_file_blocks();
+ sb->s_maxbytes = sbi->max_file_blocks <<
+ le32_to_cpu(raw_super->log_blocksize);
sb->s_max_links = F2FS_LINK_MAX;
get_random_bytes(&sbi->s_next_generation, sizeof(u32));
@@ -1183,7 +1296,7 @@ try_onemore:
/* init f2fs-specific super block info */
sbi->sb = sb;
sbi->raw_super = raw_super;
- sbi->raw_super_buf = raw_super_buf;
+ sbi->valid_super_block = valid_super_block;
mutex_init(&sbi->gc_mutex);
mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
@@ -1236,8 +1349,10 @@ try_onemore:
le64_to_cpu(sbi->ckpt->valid_block_count);
sbi->last_valid_block_count = sbi->total_valid_block_count;
sbi->alloc_valid_block_count = 0;
- INIT_LIST_HEAD(&sbi->dir_inode_list);
- spin_lock_init(&sbi->dir_inode_lock);
+ for (i = 0; i < NR_INODE_TYPE; i++) {
+ INIT_LIST_HEAD(&sbi->inode_list[i]);
+ spin_lock_init(&sbi->inode_lock[i]);
+ }
init_extent_cache_info(sbi);
@@ -1355,12 +1470,14 @@ try_onemore:
f2fs_commit_super(sbi, true);
}
- sbi->cp_expires = round_jiffies_up(jiffies);
-
+ f2fs_update_time(sbi, CP_TIME);
+ f2fs_update_time(sbi, REQ_TIME);
return 0;
free_kobj:
kobject_del(&sbi->s_kobj);
+ kobject_put(&sbi->s_kobj);
+ wait_for_completion(&sbi->s_kobj_unregister);
free_proc:
if (sbi->s_proc) {
remove_proc_entry("segment_info", sbi->s_proc);
@@ -1387,7 +1504,7 @@ free_meta_inode:
free_options:
kfree(options);
free_sb_buf:
- brelse(raw_super_buf);
+ kfree(raw_super);
free_sbi:
kfree(sbi);
@@ -1424,8 +1541,9 @@ MODULE_ALIAS_FS("f2fs");
static int __init init_inodecache(void)
{
- f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache",
- sizeof(struct f2fs_inode_info));
+ f2fs_inode_cachep = kmem_cache_create("f2fs_inode_cache",
+ sizeof(struct f2fs_inode_info), 0,
+ SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT, NULL);
if (!f2fs_inode_cachep)
return -ENOMEM;
return 0;
@@ -1478,10 +1596,14 @@ static int __init init_f2fs_fs(void)
err = register_filesystem(&f2fs_fs_type);
if (err)
goto free_shrinker;
- f2fs_create_root_stats();
+ err = f2fs_create_root_stats();
+ if (err)
+ goto free_filesystem;
f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
return 0;
+free_filesystem:
+ unregister_filesystem(&f2fs_fs_type);
free_shrinker:
unregister_shrinker(&f2fs_shrinker_info);
free_crypto:
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 862368a32..10f1e784f 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -25,38 +25,6 @@
#include "f2fs.h"
#include "xattr.h"
-static size_t f2fs_xattr_generic_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t len)
-{
- struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
- int total_len, prefix_len;
-
- switch (handler->flags) {
- case F2FS_XATTR_INDEX_USER:
- if (!test_opt(sbi, XATTR_USER))
- return -EOPNOTSUPP;
- break;
- case F2FS_XATTR_INDEX_TRUSTED:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- break;
- case F2FS_XATTR_INDEX_SECURITY:
- break;
- default:
- return -EINVAL;
- }
-
- prefix_len = strlen(handler->prefix);
- total_len = prefix_len + len + 1;
- if (list && total_len <= list_size) {
- memcpy(list, handler->prefix, prefix_len);
- memcpy(list + prefix_len, name, len);
- list[prefix_len + len] = '\0';
- }
- return total_len;
-}
-
static int f2fs_xattr_generic_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name, void *buffer,
size_t size)
@@ -77,8 +45,6 @@ static int f2fs_xattr_generic_get(const struct xattr_handler *handler,
default:
return -EINVAL;
}
- if (strcmp(name, "") == 0)
- return -EINVAL;
return f2fs_getxattr(d_inode(dentry), handler->flags, name,
buffer, size, NULL);
}
@@ -103,24 +69,20 @@ static int f2fs_xattr_generic_set(const struct xattr_handler *handler,
default:
return -EINVAL;
}
- if (strcmp(name, "") == 0)
- return -EINVAL;
-
return f2fs_setxattr(d_inode(dentry), handler->flags, name,
value, size, NULL, flags);
}
-static size_t f2fs_xattr_advise_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t len)
+static bool f2fs_xattr_user_list(struct dentry *dentry)
{
- const char *xname = F2FS_SYSTEM_ADVISE_PREFIX;
- size_t size;
+ struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
- size = strlen(xname) + 1;
- if (list && size <= list_size)
- memcpy(list, xname, size);
- return size;
+ return test_opt(sbi, XATTR_USER);
+}
+
+static bool f2fs_xattr_trusted_list(struct dentry *dentry)
+{
+ return capable(CAP_SYS_ADMIN);
}
static int f2fs_xattr_advise_get(const struct xattr_handler *handler,
@@ -129,9 +91,6 @@ static int f2fs_xattr_advise_get(const struct xattr_handler *handler,
{
struct inode *inode = d_inode(dentry);
- if (strcmp(name, "") != 0)
- return -EINVAL;
-
if (buffer)
*((char *)buffer) = F2FS_I(inode)->i_advise;
return sizeof(char);
@@ -143,8 +102,6 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler,
{
struct inode *inode = d_inode(dentry);
- if (strcmp(name, "") != 0)
- return -EINVAL;
if (!inode_owner_or_capable(inode))
return -EPERM;
if (value == NULL)
@@ -183,7 +140,7 @@ int f2fs_init_security(struct inode *inode, struct inode *dir,
const struct xattr_handler f2fs_xattr_user_handler = {
.prefix = XATTR_USER_PREFIX,
.flags = F2FS_XATTR_INDEX_USER,
- .list = f2fs_xattr_generic_list,
+ .list = f2fs_xattr_user_list,
.get = f2fs_xattr_generic_get,
.set = f2fs_xattr_generic_set,
};
@@ -191,15 +148,14 @@ const struct xattr_handler f2fs_xattr_user_handler = {
const struct xattr_handler f2fs_xattr_trusted_handler = {
.prefix = XATTR_TRUSTED_PREFIX,
.flags = F2FS_XATTR_INDEX_TRUSTED,
- .list = f2fs_xattr_generic_list,
+ .list = f2fs_xattr_trusted_list,
.get = f2fs_xattr_generic_get,
.set = f2fs_xattr_generic_set,
};
const struct xattr_handler f2fs_xattr_advise_handler = {
- .prefix = F2FS_SYSTEM_ADVISE_PREFIX,
+ .name = F2FS_SYSTEM_ADVISE_NAME,
.flags = F2FS_XATTR_INDEX_ADVISE,
- .list = f2fs_xattr_advise_list,
.get = f2fs_xattr_advise_get,
.set = f2fs_xattr_advise_set,
};
@@ -207,7 +163,6 @@ const struct xattr_handler f2fs_xattr_advise_handler = {
const struct xattr_handler f2fs_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
.flags = F2FS_XATTR_INDEX_SECURITY,
- .list = f2fs_xattr_generic_list,
.get = f2fs_xattr_generic_get,
.set = f2fs_xattr_generic_set,
};
@@ -455,20 +410,27 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
list_for_each_xattr(entry, base_addr) {
const struct xattr_handler *handler =
f2fs_xattr_handler(entry->e_name_index);
+ const char *prefix;
+ size_t prefix_len;
size_t size;
- if (!handler)
+ if (!handler || (handler->list && !handler->list(dentry)))
continue;
- size = handler->list(handler, dentry, buffer, rest,
- entry->e_name, entry->e_name_len);
- if (buffer && size > rest) {
- error = -ERANGE;
- goto cleanup;
+ prefix = handler->prefix ?: handler->name;
+ prefix_len = strlen(prefix);
+ size = prefix_len + entry->e_name_len + 1;
+ if (buffer) {
+ if (size > rest) {
+ error = -ERANGE;
+ goto cleanup;
+ }
+ memcpy(buffer, prefix, prefix_len);
+ buffer += prefix_len;
+ memcpy(buffer, entry->e_name, entry->e_name_len);
+ buffer += entry->e_name_len;
+ *buffer++ = 0;
}
-
- if (buffer)
- buffer += size;
rest -= size;
}
error = buffer_size - rest;
@@ -609,7 +571,7 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
if (ipage)
return __f2fs_setxattr(inode, index, name, value,
size, ipage, flags);
- f2fs_balance_fs(sbi);
+ f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
/* protect xattr_ver */
@@ -618,5 +580,6 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
up_write(&F2FS_I(inode)->i_sem);
f2fs_unlock_op(sbi);
+ f2fs_update_time(sbi, REQ_TIME);
return err;
}
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index 71a7100d5..79dccc825 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -27,7 +27,7 @@
#define F2FS_XATTR_REFCOUNT_MAX 1024
/* Name indexes */
-#define F2FS_SYSTEM_ADVISE_PREFIX "system.advise"
+#define F2FS_SYSTEM_ADVISE_NAME "system.advise"
#define F2FS_XATTR_INDEX_USER 1
#define F2FS_XATTR_INDEX_POSIX_ACL_ACCESS 2
#define F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT 3
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 93fc62232..5d3849215 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -301,15 +301,59 @@ static int fat_bmap_cluster(struct inode *inode, int cluster)
return dclus;
}
-int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
- unsigned long *mapped_blocks, int create)
+int fat_get_mapped_cluster(struct inode *inode, sector_t sector,
+ sector_t last_block,
+ unsigned long *mapped_blocks, sector_t *bmap)
{
struct super_block *sb = inode->i_sb;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ int cluster, offset;
+
+ cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits);
+ offset = sector & (sbi->sec_per_clus - 1);
+ cluster = fat_bmap_cluster(inode, cluster);
+ if (cluster < 0)
+ return cluster;
+ else if (cluster) {
+ *bmap = fat_clus_to_blknr(sbi, cluster) + offset;
+ *mapped_blocks = sbi->sec_per_clus - offset;
+ if (*mapped_blocks > last_block - sector)
+ *mapped_blocks = last_block - sector;
+ }
+
+ return 0;
+}
+
+static int is_exceed_eof(struct inode *inode, sector_t sector,
+ sector_t *last_block, int create)
+{
+ struct super_block *sb = inode->i_sb;
const unsigned long blocksize = sb->s_blocksize;
const unsigned char blocksize_bits = sb->s_blocksize_bits;
+
+ *last_block = (i_size_read(inode) + (blocksize - 1)) >> blocksize_bits;
+ if (sector >= *last_block) {
+ if (!create)
+ return 1;
+
+ /*
+ * ->mmu_private can access on only allocation path.
+ * (caller must hold ->i_mutex)
+ */
+ *last_block = (MSDOS_I(inode)->mmu_private + (blocksize - 1))
+ >> blocksize_bits;
+ if (sector >= *last_block)
+ return 1;
+ }
+
+ return 0;
+}
+
+int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
+ unsigned long *mapped_blocks, int create, bool from_bmap)
+{
+ struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
sector_t last_block;
- int cluster, offset;
*phys = 0;
*mapped_blocks = 0;
@@ -321,31 +365,16 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
return 0;
}
- last_block = (i_size_read(inode) + (blocksize - 1)) >> blocksize_bits;
- if (sector >= last_block) {
- if (!create)
+ if (!from_bmap) {
+ if (is_exceed_eof(inode, sector, &last_block, create))
return 0;
-
- /*
- * ->mmu_private can access on only allocation path.
- * (caller must hold ->i_mutex)
- */
- last_block = (MSDOS_I(inode)->mmu_private + (blocksize - 1))
- >> blocksize_bits;
+ } else {
+ last_block = inode->i_blocks >>
+ (inode->i_sb->s_blocksize_bits - 9);
if (sector >= last_block)
return 0;
}
- cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits);
- offset = sector & (sbi->sec_per_clus - 1);
- cluster = fat_bmap_cluster(inode, cluster);
- if (cluster < 0)
- return cluster;
- else if (cluster) {
- *phys = fat_clus_to_blknr(sbi, cluster) + offset;
- *mapped_blocks = sbi->sec_per_clus - offset;
- if (*mapped_blocks > last_block - sector)
- *mapped_blocks = last_block - sector;
- }
- return 0;
+ return fat_get_mapped_cluster(inode, sector, last_block, mapped_blocks,
+ phys);
}
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 8b2127ffb..d0b95c950 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -91,7 +91,7 @@ next:
*bh = NULL;
iblock = *pos >> sb->s_blocksize_bits;
- err = fat_bmap(dir, iblock, &phys, &mapped_blocks, 0);
+ err = fat_bmap(dir, iblock, &phys, &mapped_blocks, 0, false);
if (err || !phys)
return -1; /* beyond EOF or error */
@@ -769,7 +769,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *file,
buf.dirent = dirent;
buf.result = 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
buf.ctx.pos = file->f_pos;
ret = -ENOENT;
if (!IS_DEADDIR(inode)) {
@@ -777,7 +777,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *file,
short_only, both ? &buf : NULL);
file->f_pos = buf.ctx.pos;
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (ret >= 0)
ret = buf.result;
return ret;
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index be5e15323..e6b764a17 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -87,7 +87,7 @@ struct msdos_sb_info {
unsigned int vol_id; /*volume ID*/
int fatent_shift;
- struct fatent_operations *fatent_ops;
+ const struct fatent_operations *fatent_ops;
struct inode *fat_inode;
struct inode *fsinfo_inode;
@@ -285,8 +285,11 @@ static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len)
extern void fat_cache_inval_inode(struct inode *inode);
extern int fat_get_cluster(struct inode *inode, int cluster,
int *fclus, int *dclus);
+extern int fat_get_mapped_cluster(struct inode *inode, sector_t sector,
+ sector_t last_block,
+ unsigned long *mapped_blocks, sector_t *bmap);
extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
- unsigned long *mapped_blocks, int create);
+ unsigned long *mapped_blocks, int create, bool from_bmap);
/* fat/dir.c */
extern const struct file_operations fat_dir_operations;
@@ -384,6 +387,7 @@ static inline unsigned long fat_dir_hash(int logstart)
{
return hash_32(logstart, FAT_HASH_BITS);
}
+extern int fat_add_cluster(struct inode *inode);
/* fat/misc.c */
extern __printf(3, 4) __cold
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 822655713..1d9a8c4e9 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -99,7 +99,7 @@ err:
static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent,
int offset, sector_t blocknr)
{
- struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops;
+ const struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops;
WARN_ON(blocknr < MSDOS_SB(sb)->fat_start);
fatent->fat_inode = MSDOS_SB(sb)->fat_inode;
@@ -246,7 +246,7 @@ static int fat32_ent_next(struct fat_entry *fatent)
return 0;
}
-static struct fatent_operations fat12_ops = {
+static const struct fatent_operations fat12_ops = {
.ent_blocknr = fat12_ent_blocknr,
.ent_set_ptr = fat12_ent_set_ptr,
.ent_bread = fat12_ent_bread,
@@ -255,7 +255,7 @@ static struct fatent_operations fat12_ops = {
.ent_next = fat12_ent_next,
};
-static struct fatent_operations fat16_ops = {
+static const struct fatent_operations fat16_ops = {
.ent_blocknr = fat_ent_blocknr,
.ent_set_ptr = fat16_ent_set_ptr,
.ent_bread = fat_ent_bread,
@@ -264,7 +264,7 @@ static struct fatent_operations fat16_ops = {
.ent_next = fat16_ent_next,
};
-static struct fatent_operations fat32_ops = {
+static const struct fatent_operations fat32_ops = {
.ent_blocknr = fat_ent_blocknr,
.ent_set_ptr = fat32_ent_set_ptr,
.ent_bread = fat_ent_bread,
@@ -320,7 +320,7 @@ static inline int fat_ent_update_ptr(struct super_block *sb,
int offset, sector_t blocknr)
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
- struct fatent_operations *ops = sbi->fatent_ops;
+ const struct fatent_operations *ops = sbi->fatent_ops;
struct buffer_head **bhs = fatent->bhs;
/* Is this fatent's blocks including this entry? */
@@ -349,7 +349,7 @@ int fat_ent_read(struct inode *inode, struct fat_entry *fatent, int entry)
{
struct super_block *sb = inode->i_sb;
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
- struct fatent_operations *ops = sbi->fatent_ops;
+ const struct fatent_operations *ops = sbi->fatent_ops;
int err, offset;
sector_t blocknr;
@@ -407,7 +407,7 @@ int fat_ent_write(struct inode *inode, struct fat_entry *fatent,
int new, int wait)
{
struct super_block *sb = inode->i_sb;
- struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops;
+ const struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops;
int err;
ops->ent_put(fatent, new);
@@ -432,7 +432,7 @@ static inline int fat_ent_next(struct msdos_sb_info *sbi,
static inline int fat_ent_read_block(struct super_block *sb,
struct fat_entry *fatent)
{
- struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops;
+ const struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops;
sector_t blocknr;
int offset;
@@ -463,7 +463,7 @@ int fat_alloc_clusters(struct inode *inode, int *cluster, int nr_cluster)
{
struct super_block *sb = inode->i_sb;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
- struct fatent_operations *ops = sbi->fatent_ops;
+ const struct fatent_operations *ops = sbi->fatent_ops;
struct fat_entry fatent, prev_ent;
struct buffer_head *bhs[MAX_BUF_PER_PAGE];
int i, count, err, nr_bhs, idx_clus;
@@ -551,7 +551,7 @@ int fat_free_clusters(struct inode *inode, int cluster)
{
struct super_block *sb = inode->i_sb;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
- struct fatent_operations *ops = sbi->fatent_ops;
+ const struct fatent_operations *ops = sbi->fatent_ops;
struct fat_entry fatent;
struct buffer_head *bhs[MAX_BUF_PER_PAGE];
int i, err, nr_bhs;
@@ -636,7 +636,7 @@ EXPORT_SYMBOL_GPL(fat_free_clusters);
static void fat_ent_reada(struct super_block *sb, struct fat_entry *fatent,
unsigned long reada_blocks)
{
- struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops;
+ const struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops;
sector_t blocknr;
int i, offset;
@@ -649,7 +649,7 @@ static void fat_ent_reada(struct super_block *sb, struct fat_entry *fatent,
int fat_count_free_clusters(struct super_block *sb)
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
- struct fatent_operations *ops = sbi->fatent_ops;
+ const struct fatent_operations *ops = sbi->fatent_ops;
struct fat_entry fatent;
unsigned long reada_blocks, reada_mask, cur_block;
int err = 0, free;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index a08f10399..f70185668 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -14,15 +14,19 @@
#include <linux/backing-dev.h>
#include <linux/fsnotify.h>
#include <linux/security.h>
+#include <linux/falloc.h>
#include "fat.h"
+static long fat_fallocate(struct file *file, int mode,
+ loff_t offset, loff_t len);
+
static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr)
{
u32 attr;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
attr = fat_make_attrs(inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return put_user(attr, user_attr);
}
@@ -43,7 +47,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
err = mnt_want_write_file(file);
if (err)
goto out;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* ATTR_VOLUME and ATTR_DIR cannot be changed; this also
@@ -105,7 +109,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
fat_save_attrs(inode, attr);
mark_inode_dirty(inode);
out_unlock_inode:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
mnt_drop_write_file(file);
out:
return err;
@@ -177,6 +181,7 @@ const struct file_operations fat_file_operations = {
#endif
.fsync = fat_file_fsync,
.splice_read = generic_file_splice_read,
+ .fallocate = fat_fallocate,
};
static int fat_cont_expand(struct inode *inode, loff_t size)
@@ -215,6 +220,62 @@ out:
return err;
}
+/*
+ * Preallocate space for a file. This implements fat's fallocate file
+ * operation, which gets called from sys_fallocate system call. User
+ * space requests len bytes at offset. If FALLOC_FL_KEEP_SIZE is set
+ * we just allocate clusters without zeroing them out. Otherwise we
+ * allocate and zero out clusters via an expanding truncate.
+ */
+static long fat_fallocate(struct file *file, int mode,
+ loff_t offset, loff_t len)
+{
+ int nr_cluster; /* Number of clusters to be allocated */
+ loff_t mm_bytes; /* Number of bytes to be allocated for file */
+ loff_t ondisksize; /* block aligned on-disk size in bytes*/
+ struct inode *inode = file->f_mapping->host;
+ struct super_block *sb = inode->i_sb;
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ int err = 0;
+
+ /* No support for hole punch or other fallocate flags. */
+ if (mode & ~FALLOC_FL_KEEP_SIZE)
+ return -EOPNOTSUPP;
+
+ /* No support for dir */
+ if (!S_ISREG(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ inode_lock(inode);
+ if (mode & FALLOC_FL_KEEP_SIZE) {
+ ondisksize = inode->i_blocks << 9;
+ if ((offset + len) <= ondisksize)
+ goto error;
+
+ /* First compute the number of clusters to be allocated */
+ mm_bytes = offset + len - ondisksize;
+ nr_cluster = (mm_bytes + (sbi->cluster_size - 1)) >>
+ sbi->cluster_bits;
+
+ /* Start the allocation.We are not zeroing out the clusters */
+ while (nr_cluster-- > 0) {
+ err = fat_add_cluster(inode);
+ if (err)
+ goto error;
+ }
+ } else {
+ if ((offset + len) <= i_size_read(inode))
+ goto error;
+
+ /* This is just an expanding truncate */
+ err = fat_cont_expand(inode, (offset + len));
+ }
+
+error:
+ inode_unlock(inode);
+ return err;
+}
+
/* Free all clusters after the skip'th cluster. */
static int fat_free(struct inode *inode, int skip)
{
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 509411dd3..a55990521 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -93,7 +93,7 @@ static struct fat_floppy_defaults {
},
};
-static int fat_add_cluster(struct inode *inode)
+int fat_add_cluster(struct inode *inode)
{
int err, cluster;
@@ -115,10 +115,10 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,
struct super_block *sb = inode->i_sb;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
unsigned long mapped_blocks;
- sector_t phys;
+ sector_t phys, last_block;
int err, offset;
- err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create);
+ err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create, false);
if (err)
return err;
if (phys) {
@@ -135,8 +135,14 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,
return -EIO;
}
+ last_block = inode->i_blocks >> (sb->s_blocksize_bits - 9);
offset = (unsigned long)iblock & (sbi->sec_per_clus - 1);
- if (!offset) {
+ /*
+ * allocate a cluster according to the following.
+ * 1) no more available blocks
+ * 2) not part of fallocate region
+ */
+ if (!offset && !(iblock < last_block)) {
/* TODO: multiple cluster allocation would be desirable. */
err = fat_add_cluster(inode);
if (err)
@@ -148,7 +154,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,
*max_blocks = min(mapped_blocks, *max_blocks);
MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits;
- err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create);
+ err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create, false);
if (err)
return err;
@@ -273,13 +279,38 @@ static ssize_t fat_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
return ret;
}
+static int fat_get_block_bmap(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ struct super_block *sb = inode->i_sb;
+ unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
+ int err;
+ sector_t bmap;
+ unsigned long mapped_blocks;
+
+ BUG_ON(create != 0);
+
+ err = fat_bmap(inode, iblock, &bmap, &mapped_blocks, create, true);
+ if (err)
+ return err;
+
+ if (bmap) {
+ map_bh(bh_result, sb, bmap);
+ max_blocks = min(mapped_blocks, max_blocks);
+ }
+
+ bh_result->b_size = max_blocks << sb->s_blocksize_bits;
+
+ return 0;
+}
+
static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
{
sector_t blocknr;
/* fat_get_cluster() assumes the requested blocknr isn't truncated. */
down_read(&MSDOS_I(mapping->host)->truncate_lock);
- blocknr = generic_block_bmap(mapping, block, fat_get_block);
+ blocknr = generic_block_bmap(mapping, block, fat_get_block_bmap);
up_read(&MSDOS_I(mapping->host)->truncate_lock);
return blocknr;
@@ -449,6 +480,24 @@ static int fat_calc_dir_size(struct inode *inode)
return 0;
}
+static int fat_validate_dir(struct inode *dir)
+{
+ struct super_block *sb = dir->i_sb;
+
+ if (dir->i_nlink < 2) {
+ /* Directory should have "."/".." entries at least. */
+ fat_fs_error(sb, "corrupted directory (invalid entries)");
+ return -EIO;
+ }
+ if (MSDOS_I(dir)->i_start == 0 ||
+ MSDOS_I(dir)->i_start == MSDOS_SB(sb)->root_cluster) {
+ /* Directory should point valid cluster. */
+ fat_fs_error(sb, "corrupted directory (invalid i_start)");
+ return -EIO;
+ }
+ return 0;
+}
+
/* doesn't deal with root inode */
int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
{
@@ -475,6 +524,10 @@ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
MSDOS_I(inode)->mmu_private = inode->i_size;
set_nlink(inode, fat_subdirs(inode));
+
+ error = fat_validate_dir(inode);
+ if (error < 0)
+ return error;
} else { /* not a directory */
inode->i_generation |= 1;
inode->i_mode = fat_make_mode(sbi, de->attr,
@@ -553,13 +606,43 @@ out:
EXPORT_SYMBOL_GPL(fat_build_inode);
+static int __fat_write_inode(struct inode *inode, int wait);
+
+static void fat_free_eofblocks(struct inode *inode)
+{
+ /* Release unwritten fallocated blocks on inode eviction. */
+ if ((inode->i_blocks << 9) >
+ round_up(MSDOS_I(inode)->mmu_private,
+ MSDOS_SB(inode->i_sb)->cluster_size)) {
+ int err;
+
+ fat_truncate_blocks(inode, MSDOS_I(inode)->mmu_private);
+ /* Fallocate results in updating the i_start/iogstart
+ * for the zero byte file. So, make it return to
+ * original state during evict and commit it to avoid
+ * any corruption on the next access to the cluster
+ * chain for the file.
+ */
+ err = __fat_write_inode(inode, inode_needs_sync(inode));
+ if (err) {
+ fat_msg(inode->i_sb, KERN_WARNING, "Failed to "
+ "update on disk inode for unused "
+ "fallocated blocks, inode could be "
+ "corrupted. Please run fsck");
+ }
+
+ }
+}
+
static void fat_evict_inode(struct inode *inode)
{
truncate_inode_pages_final(&inode->i_data);
if (!inode->i_nlink) {
inode->i_size = 0;
fat_truncate_blocks(inode, 0);
- }
+ } else
+ fat_free_eofblocks(inode);
+
invalidate_inode_buffers(inode);
clear_inode(inode);
fat_cache_inval_inode(inode);
@@ -677,7 +760,7 @@ static int __init fat_init_inodecache(void)
fat_inode_cachep = kmem_cache_create("fat_inode_cache",
sizeof(struct msdos_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
init_once);
if (fat_inode_cachep == NULL)
return -ENOMEM;
@@ -1146,7 +1229,12 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
case Opt_time_offset:
if (match_int(&args[0], &option))
return -EINVAL;
- if (option < -12 * 60 || option > 12 * 60)
+ /*
+ * GMT+-12 zones may have DST corrections so at least
+ * 13 hours difference is needed. Make the limit 24
+ * just in case someone invents something unusual.
+ */
+ if (option < -24 * 60 || option > 24 * 60)
return -EINVAL;
opts->tz_set = 1;
opts->time_offset = option;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 8abb9f814..350a2c8cf 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -29,7 +29,7 @@
#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
-int setfl(int fd, struct file * filp, unsigned long arg)
+static int setfl(int fd, struct file * filp, unsigned long arg)
{
struct inode * inode = file_inode(filp);
int error = 0;
@@ -51,7 +51,8 @@ int setfl(int fd, struct file * filp, unsigned long arg)
if (arg & O_NDELAY)
arg |= O_NONBLOCK;
- if (arg & O_DIRECT) {
+ /* Pipe packetized mode is controlled by O_DIRECT flag */
+ if (!S_ISFIFO(filp->f_inode->i_mode) && (arg & O_DIRECT)) {
if (!filp->f_mapping || !filp->f_mapping->a_ops ||
!filp->f_mapping->a_ops->direct_IO)
return -EINVAL;
@@ -59,8 +60,6 @@ int setfl(int fd, struct file * filp, unsigned long arg)
if (filp->f_op->check_flags)
error = filp->f_op->check_flags(arg);
- if (!error && filp->f_op->setfl)
- error = filp->f_op->setfl(filp, arg);
if (error)
return error;
@@ -81,7 +80,6 @@ int setfl(int fd, struct file * filp, unsigned long arg)
out:
return error;
}
-EXPORT_SYMBOL_GPL(setfl);
static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
int force)
diff --git a/fs/file.c b/fs/file.c
index 39f8f1592..1fbc5c055 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -25,9 +25,9 @@
int sysctl_nr_open __read_mostly = 1024*1024;
int sysctl_nr_open_min = BITS_PER_LONG;
-/* our max() is unusable in constant expressions ;-/ */
-#define __const_max(x, y) ((x) < (y) ? (x) : (y))
-int sysctl_nr_open_max = __const_max(INT_MAX, ~(size_t)0/sizeof(void *)) &
+/* our min() is unusable in constant expressions ;-/ */
+#define __const_min(x, y) ((x) < (y) ? (x) : (y))
+int sysctl_nr_open_max = __const_min(INT_MAX, ~(size_t)0/sizeof(void *)) &
-BITS_PER_LONG;
static void *alloc_fdmem(size_t size)
@@ -37,11 +37,12 @@ static void *alloc_fdmem(size_t size)
* vmalloc() if the allocation size will be considered "large" by the VM.
*/
if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
- void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY);
+ void *data = kmalloc(size, GFP_KERNEL_ACCOUNT |
+ __GFP_NOWARN | __GFP_NORETRY);
if (data != NULL)
return data;
}
- return vmalloc(size);
+ return __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_HIGHMEM, PAGE_KERNEL);
}
static void __free_fdtable(struct fdtable *fdt)
@@ -126,7 +127,7 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
if (unlikely(nr > sysctl_nr_open))
nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
- fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL);
+ fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL_ACCOUNT);
if (!fdt)
goto out;
fdt->max_fds = nr;
diff --git a/fs/file_table.c b/fs/file_table.c
index ae9f2676d..ad17e05eb 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -147,7 +147,6 @@ over:
}
return ERR_PTR(-ENFILE);
}
-EXPORT_SYMBOL_GPL(get_empty_filp);
/**
* alloc_file - allocate and initialize a 'struct file'
@@ -259,7 +258,6 @@ void flush_delayed_fput(void)
{
delayed_fput(NULL);
}
-EXPORT_SYMBOL_GPL(flush_delayed_fput);
static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
@@ -302,7 +300,6 @@ void __fput_sync(struct file *file)
}
EXPORT_SYMBOL(fput);
-EXPORT_SYMBOL_GPL(__fput_sync);
void put_filp(struct file *file)
{
@@ -311,7 +308,6 @@ void put_filp(struct file *file)
file_free(file);
}
}
-EXPORT_SYMBOL_GPL(put_filp);
void __init files_init(void)
{
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 5797d45a7..c5618db11 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -46,9 +46,9 @@ void put_filesystem(struct file_system_type *fs)
static struct file_system_type **find_filesystem(const char *name, unsigned len)
{
struct file_system_type **p;
- for (p=&file_systems; *p; p=&(*p)->next)
- if (strlen((*p)->name) == len &&
- strncmp((*p)->name, name, len) == 0)
+ for (p = &file_systems; *p; p = &(*p)->next)
+ if (strncmp((*p)->name, name, len) == 0 &&
+ !(*p)->name[len])
break;
return p;
}
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index ef73ed674..3e2ccade6 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -326,6 +326,7 @@ vxfs_iget(struct super_block *sbp, ino_t ino)
} else if (S_ISLNK(ip->i_mode)) {
if (!VXFS_ISIMMED(vip)) {
ip->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(ip);
ip->i_mapping->a_ops = &vxfs_aops;
} else {
ip->i_op = &simple_symlink_inode_operations;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 7a8ea1351..5c46ed9f3 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -685,9 +685,7 @@ void wbc_account_io(struct writeback_control *wbc, struct page *page,
if (!wbc->wb)
return;
- rcu_read_lock();
id = mem_cgroup_css_from_page(page)->id;
- rcu_read_unlock();
if (id == wbc->wb_id) {
wbc->wb_bytes += bytes;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 5e2e08712..4b855b65d 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -944,7 +944,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
if (!parent)
return -ENOENT;
- mutex_lock(&parent->i_mutex);
+ inode_lock(parent);
if (!S_ISDIR(parent->i_mode))
goto unlock;
@@ -962,7 +962,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
fuse_invalidate_entry(entry);
if (child_nodeid != 0 && d_really_is_positive(entry)) {
- mutex_lock(&d_inode(entry)->i_mutex);
+ inode_lock(d_inode(entry));
if (get_node_id(d_inode(entry)) != child_nodeid) {
err = -ENOENT;
goto badentry;
@@ -983,7 +983,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
clear_nlink(d_inode(entry));
err = 0;
badentry:
- mutex_unlock(&d_inode(entry)->i_mutex);
+ inode_unlock(d_inode(entry));
if (!err)
d_delete(entry);
} else {
@@ -992,7 +992,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
dput(entry);
unlock:
- mutex_unlock(&parent->i_mutex);
+ inode_unlock(parent);
iput(parent);
return err;
}
@@ -1365,15 +1365,19 @@ static int fuse_readdir(struct file *file, struct dir_context *ctx)
return err;
}
-static const char *fuse_follow_link(struct dentry *dentry, void **cookie)
+static const char *fuse_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct inode *inode = d_inode(dentry);
struct fuse_conn *fc = get_fuse_conn(inode);
FUSE_ARGS(args);
char *link;
ssize_t ret;
- link = (char *) __get_free_page(GFP_KERNEL);
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
+ link = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!link)
return ERR_PTR(-ENOMEM);
@@ -1385,11 +1389,11 @@ static const char *fuse_follow_link(struct dentry *dentry, void **cookie)
args.out.args[0].value = link;
ret = fuse_simple_request(fc, &args);
if (ret < 0) {
- free_page((unsigned long) link);
+ kfree(link);
link = ERR_PTR(ret);
} else {
link[ret] = '\0';
- *cookie = link;
+ set_delayed_call(done, kfree_link, link);
}
fuse_invalidate_atime(inode);
return link;
@@ -1500,7 +1504,7 @@ void fuse_set_nowrite(struct inode *inode)
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
- BUG_ON(!mutex_is_locked(&inode->i_mutex));
+ BUG_ON(!inode_is_locked(inode));
spin_lock(&fc->lock);
BUG_ON(fi->writectr < 0);
@@ -1909,8 +1913,7 @@ static const struct inode_operations fuse_common_inode_operations = {
static const struct inode_operations fuse_symlink_inode_operations = {
.setattr = fuse_setattr,
- .follow_link = fuse_follow_link,
- .put_link = free_page_put_link,
+ .get_link = fuse_get_link,
.readlink = generic_readlink,
.getattr = fuse_getattr,
.setxattr = fuse_setxattr,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 570ca4053..b03d253ec 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -207,7 +207,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
return err;
if (lock_inode)
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
err = fuse_do_open(fc, get_node_id(inode), file, isdir);
@@ -215,7 +215,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
fuse_finish_open(inode, file);
if (lock_inode)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
@@ -413,9 +413,9 @@ static int fuse_flush(struct file *file, fl_owner_t id)
if (err)
return err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
fuse_sync_writes(inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
req = fuse_get_req_nofail_nopages(fc, file);
memset(&inarg, 0, sizeof(inarg));
@@ -450,7 +450,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
if (is_bad_inode(inode))
return -EIO;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* Start writeback against all dirty pages of the inode, then
@@ -486,7 +486,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
err = 0;
}
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
@@ -1160,7 +1160,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
return generic_file_write_iter(iocb, from);
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* We can write back this queue in page reclaim */
current->backing_dev_info = inode_to_bdi(inode);
@@ -1210,7 +1210,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
}
out:
current->backing_dev_info = NULL;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return written ? written : err;
}
@@ -1322,10 +1322,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) {
if (!write)
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
fuse_sync_writes(inode);
if (!write)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
while (count) {
@@ -1413,14 +1413,14 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
return -EIO;
/* Don't allow parallel writes to the same file */
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
res = generic_write_checks(iocb, from);
if (res > 0)
res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE);
fuse_invalidate_attr(inode);
if (res > 0)
fuse_write_update_size(inode, iocb->ki_pos);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return res;
}
@@ -2231,20 +2231,77 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
return err ? 0 : outarg.block;
}
+static loff_t fuse_lseek(struct file *file, loff_t offset, int whence)
+{
+ struct inode *inode = file->f_mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+ FUSE_ARGS(args);
+ struct fuse_lseek_in inarg = {
+ .fh = ff->fh,
+ .offset = offset,
+ .whence = whence
+ };
+ struct fuse_lseek_out outarg;
+ int err;
+
+ if (fc->no_lseek)
+ goto fallback;
+
+ args.in.h.opcode = FUSE_LSEEK;
+ args.in.h.nodeid = ff->nodeid;
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.out.numargs = 1;
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
+ err = fuse_simple_request(fc, &args);
+ if (err) {
+ if (err == -ENOSYS) {
+ fc->no_lseek = 1;
+ goto fallback;
+ }
+ return err;
+ }
+
+ return vfs_setpos(file, outarg.offset, inode->i_sb->s_maxbytes);
+
+fallback:
+ err = fuse_update_attributes(inode, NULL, file, NULL);
+ if (!err)
+ return generic_file_llseek(file, offset, whence);
+ else
+ return err;
+}
+
static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence)
{
loff_t retval;
struct inode *inode = file_inode(file);
- /* No i_mutex protection necessary for SEEK_CUR and SEEK_SET */
- if (whence == SEEK_CUR || whence == SEEK_SET)
- return generic_file_llseek(file, offset, whence);
-
- mutex_lock(&inode->i_mutex);
- retval = fuse_update_attributes(inode, NULL, file, NULL);
- if (!retval)
+ switch (whence) {
+ case SEEK_SET:
+ case SEEK_CUR:
+ /* No i_mutex protection necessary for SEEK_CUR and SEEK_SET */
retval = generic_file_llseek(file, offset, whence);
- mutex_unlock(&inode->i_mutex);
+ break;
+ case SEEK_END:
+ inode_lock(inode);
+ retval = fuse_update_attributes(inode, NULL, file, NULL);
+ if (!retval)
+ retval = generic_file_llseek(file, offset, whence);
+ inode_unlock(inode);
+ break;
+ case SEEK_HOLE:
+ case SEEK_DATA:
+ inode_lock(inode);
+ retval = fuse_lseek(file, offset, whence);
+ inode_unlock(inode);
+ break;
+ default:
+ retval = -EINVAL;
+ }
return retval;
}
@@ -2887,7 +2944,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
return -EOPNOTSUPP;
if (lock_inode) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (mode & FALLOC_FL_PUNCH_HOLE) {
loff_t endbyte = offset + length - 1;
err = filemap_write_and_wait_range(inode->i_mapping,
@@ -2933,7 +2990,7 @@ out:
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
if (lock_inode)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 405113101..ce394b5fe 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -605,6 +605,9 @@ struct fuse_conn {
/** Does the filesystem support asynchronous direct-IO submission? */
unsigned async_dio:1;
+ /** Is lseek not implemented by fs? */
+ unsigned no_lseek:1;
+
/** The number of requests waiting for completion */
atomic_t num_waiting;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 2913db2a5..4d69d5c0b 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1255,8 +1255,8 @@ static int __init fuse_fs_init(void)
int err;
fuse_inode_cachep = kmem_cache_create("fuse_inode",
- sizeof(struct fuse_inode),
- 0, SLAB_HWCACHE_ALIGN,
+ sizeof(struct fuse_inode), 0,
+ SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT,
fuse_inode_init_once);
err = -ENOMEM;
if (!fuse_inode_cachep)
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 1be3b061c..791932617 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -31,9 +31,9 @@ static const char *gfs2_acl_name(int type)
{
switch (type) {
case ACL_TYPE_ACCESS:
- return GFS2_POSIX_ACL_ACCESS;
+ return XATTR_POSIX_ACL_ACCESS;
case ACL_TYPE_DEFAULT:
- return GFS2_POSIX_ACL_DEFAULT;
+ return XATTR_POSIX_ACL_DEFAULT;
}
return NULL;
}
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
index 2d65ec4cd..3af4f407a 100644
--- a/fs/gfs2/acl.h
+++ b/fs/gfs2/acl.h
@@ -12,8 +12,6 @@
#include "incore.h"
-#define GFS2_POSIX_ACL_ACCESS "posix_acl_access"
-#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default"
#define GFS2_ACL_MAX_ENTRIES(sdp) ((300 << (sdp)->sd_sb.sb_bsize_shift) >> 12)
extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type);
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 1caee0534..93f07465e 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -914,7 +914,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
failed:
gfs2_trans_end(sdp);
gfs2_inplace_release(ip);
- if (ip->i_res->rs_qa_qd_num)
+ if (ip->i_qadata && ip->i_qadata->qa_qd_num)
gfs2_quota_unlock(ip);
if (inode == sdp->sd_rindex) {
gfs2_glock_dq(&m_ip->i_gh);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 61296ecbd..0860f0b5b 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -787,8 +787,8 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
if (error)
goto out_rlist;
- if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */
- gfs2_rs_deltree(ip->i_res);
+ if (gfs2_rs_active(&ip->i_res)) /* needs to be done with the rgrp glock held */
+ gfs2_rs_deltree(&ip->i_res);
error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
RES_INDIRECT + RES_STATFS + RES_QUOTA,
@@ -1291,13 +1291,9 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
if (ret)
return ret;
- ret = get_write_access(inode);
- if (ret)
- return ret;
-
inode_dio_wait(inode);
- ret = gfs2_rs_alloc(ip);
+ ret = gfs2_rsqa_alloc(ip);
if (ret)
goto out;
@@ -1307,10 +1303,9 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
goto out;
}
- gfs2_rs_deltree(ip->i_res);
ret = do_shrink(inode, oldsize, newsize);
out:
- put_write_access(inode);
+ gfs2_rsqa_delete(ip, NULL);
return ret;
}
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index ad8a5b757..6a9259230 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -82,6 +82,8 @@
#define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1)
#define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1))
+#define GFS2_HASH_INDEX_MASK 0xffffc000
+#define GFS2_USE_HASH_FLAG 0x2000
struct qstr gfs2_qdot __read_mostly;
struct qstr gfs2_qdotdot __read_mostly;
@@ -108,7 +110,7 @@ static int gfs2_dir_get_existing_buffer(struct gfs2_inode *ip, u64 block,
struct buffer_head *bh;
int error;
- error = gfs2_meta_read(ip->i_gl, block, DIO_WAIT, &bh);
+ error = gfs2_meta_read(ip->i_gl, block, DIO_WAIT, 0, &bh);
if (error)
return error;
if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_JD)) {
@@ -305,7 +307,7 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, __be64 *buf,
BUG_ON(extlen < 1);
bh = gfs2_meta_ra(ip->i_gl, dblock, extlen);
} else {
- error = gfs2_meta_read(ip->i_gl, dblock, DIO_WAIT, &bh);
+ error = gfs2_meta_read(ip->i_gl, dblock, DIO_WAIT, 0, &bh);
if (error)
goto fail;
}
@@ -443,6 +445,27 @@ static int gfs2_dirent_last(const struct gfs2_dirent *dent,
return 0;
}
+/* Look for the dirent that contains the offset specified in data. Once we
+ * find that dirent, there must be space available there for the new dirent */
+static int gfs2_dirent_find_offset(const struct gfs2_dirent *dent,
+ const struct qstr *name,
+ void *ptr)
+{
+ unsigned required = GFS2_DIRENT_SIZE(name->len);
+ unsigned actual = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
+ unsigned totlen = be16_to_cpu(dent->de_rec_len);
+
+ if (ptr < (void *)dent || ptr >= (void *)dent + totlen)
+ return 0;
+ if (gfs2_dirent_sentinel(dent))
+ actual = 0;
+ if (ptr < (void *)dent + actual)
+ return -1;
+ if ((void *)dent + totlen >= ptr + required)
+ return 1;
+ return -1;
+}
+
static int gfs2_dirent_find_space(const struct gfs2_dirent *dent,
const struct qstr *name,
void *opaque)
@@ -682,6 +705,27 @@ static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
prev->de_rec_len = cpu_to_be16(prev_rec_len);
}
+
+static struct gfs2_dirent *do_init_dirent(struct inode *inode,
+ struct gfs2_dirent *dent,
+ const struct qstr *name,
+ struct buffer_head *bh,
+ unsigned offset)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_dirent *ndent;
+ unsigned totlen;
+
+ totlen = be16_to_cpu(dent->de_rec_len);
+ BUG_ON(offset + name->len > totlen);
+ gfs2_trans_add_meta(ip->i_gl, bh);
+ ndent = (struct gfs2_dirent *)((char *)dent + offset);
+ dent->de_rec_len = cpu_to_be16(offset);
+ gfs2_qstr2dirent(name, totlen - offset, ndent);
+ return ndent;
+}
+
+
/*
* Takes a dent from which to grab space as an argument. Returns the
* newly created dent.
@@ -691,31 +735,25 @@ static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode,
const struct qstr *name,
struct buffer_head *bh)
{
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_dirent *ndent;
- unsigned offset = 0, totlen;
+ unsigned offset = 0;
if (!gfs2_dirent_sentinel(dent))
offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
- totlen = be16_to_cpu(dent->de_rec_len);
- BUG_ON(offset + name->len > totlen);
- gfs2_trans_add_meta(ip->i_gl, bh);
- ndent = (struct gfs2_dirent *)((char *)dent + offset);
- dent->de_rec_len = cpu_to_be16(offset);
- gfs2_qstr2dirent(name, totlen - offset, ndent);
- return ndent;
+ return do_init_dirent(inode, dent, name, bh, offset);
}
-static struct gfs2_dirent *gfs2_dirent_alloc(struct inode *inode,
- struct buffer_head *bh,
- const struct qstr *name)
+static struct gfs2_dirent *gfs2_dirent_split_alloc(struct inode *inode,
+ struct buffer_head *bh,
+ const struct qstr *name,
+ void *ptr)
{
struct gfs2_dirent *dent;
dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
- gfs2_dirent_find_space, name, NULL);
+ gfs2_dirent_find_offset, name, ptr);
if (!dent || IS_ERR(dent))
return dent;
- return gfs2_init_dirent(inode, dent, name, bh);
+ return do_init_dirent(inode, dent, name, bh,
+ (unsigned)(ptr - (void *)dent));
}
static int get_leaf(struct gfs2_inode *dip, u64 leaf_no,
@@ -723,7 +761,7 @@ static int get_leaf(struct gfs2_inode *dip, u64 leaf_no,
{
int error;
- error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_WAIT, bhp);
+ error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_WAIT, 0, bhp);
if (!error && gfs2_metatype_check(GFS2_SB(&dip->i_inode), *bhp, GFS2_METATYPE_LF)) {
/* pr_info("block num=%llu\n", leaf_no); */
error = -EIO;
@@ -1051,10 +1089,11 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
if (!gfs2_dirent_sentinel(dent) &&
be32_to_cpu(dent->de_hash) < divider) {
struct qstr str;
+ void *ptr = ((char *)dent - obh->b_data) + nbh->b_data;
str.name = (char*)(dent+1);
str.len = be16_to_cpu(dent->de_name_len);
str.hash = be32_to_cpu(dent->de_hash);
- new = gfs2_dirent_alloc(inode, nbh, &str);
+ new = gfs2_dirent_split_alloc(inode, nbh, &str, ptr);
if (IS_ERR(new)) {
error = PTR_ERR(new);
break;
@@ -1186,10 +1225,10 @@ static int compare_dents(const void *a, const void *b)
int ret = 0;
dent_a = *(const struct gfs2_dirent **)a;
- hash_a = be32_to_cpu(dent_a->de_hash);
+ hash_a = dent_a->de_cookie;
dent_b = *(const struct gfs2_dirent **)b;
- hash_b = be32_to_cpu(dent_b->de_hash);
+ hash_b = dent_b->de_cookie;
if (hash_a > hash_b)
ret = 1;
@@ -1227,19 +1266,20 @@ static int compare_dents(const void *a, const void *b)
*/
static int do_filldir_main(struct gfs2_inode *dip, struct dir_context *ctx,
- const struct gfs2_dirent **darr, u32 entries,
- int *copied)
+ struct gfs2_dirent **darr, u32 entries,
+ u32 sort_start, int *copied)
{
const struct gfs2_dirent *dent, *dent_next;
u64 off, off_next;
unsigned int x, y;
int run = 0;
- sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL);
+ if (sort_start < entries)
+ sort(&darr[sort_start], entries - sort_start,
+ sizeof(struct gfs2_dirent *), compare_dents, NULL);
dent_next = darr[0];
- off_next = be32_to_cpu(dent_next->de_hash);
- off_next = gfs2_disk_hash2offset(off_next);
+ off_next = dent_next->de_cookie;
for (x = 0, y = 1; x < entries; x++, y++) {
dent = dent_next;
@@ -1247,8 +1287,7 @@ static int do_filldir_main(struct gfs2_inode *dip, struct dir_context *ctx,
if (y < entries) {
dent_next = darr[y];
- off_next = be32_to_cpu(dent_next->de_hash);
- off_next = gfs2_disk_hash2offset(off_next);
+ off_next = dent_next->de_cookie;
if (off < ctx->pos)
continue;
@@ -1295,6 +1334,40 @@ static void *gfs2_alloc_sort_buffer(unsigned size)
return ptr;
}
+
+static int gfs2_set_cookies(struct gfs2_sbd *sdp, struct buffer_head *bh,
+ unsigned leaf_nr, struct gfs2_dirent **darr,
+ unsigned entries)
+{
+ int sort_id = -1;
+ int i;
+
+ for (i = 0; i < entries; i++) {
+ unsigned offset;
+
+ darr[i]->de_cookie = be32_to_cpu(darr[i]->de_hash);
+ darr[i]->de_cookie = gfs2_disk_hash2offset(darr[i]->de_cookie);
+
+ if (!sdp->sd_args.ar_loccookie)
+ continue;
+ offset = (char *)(darr[i]) -
+ (bh->b_data + gfs2_dirent_offset(bh->b_data));
+ offset /= GFS2_MIN_DIRENT_SIZE;
+ offset += leaf_nr * sdp->sd_max_dents_per_leaf;
+ if (offset >= GFS2_USE_HASH_FLAG ||
+ leaf_nr >= GFS2_USE_HASH_FLAG) {
+ darr[i]->de_cookie |= GFS2_USE_HASH_FLAG;
+ if (sort_id < 0)
+ sort_id = i;
+ continue;
+ }
+ darr[i]->de_cookie &= GFS2_HASH_INDEX_MASK;
+ darr[i]->de_cookie |= offset;
+ }
+ return sort_id;
+}
+
+
static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx,
int *copied, unsigned *depth,
u64 leaf_no)
@@ -1304,12 +1377,11 @@ static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx,
struct buffer_head *bh;
struct gfs2_leaf *lf;
unsigned entries = 0, entries2 = 0;
- unsigned leaves = 0;
- const struct gfs2_dirent **darr, *dent;
+ unsigned leaves = 0, leaf = 0, offset, sort_offset;
+ struct gfs2_dirent **darr, *dent;
struct dirent_gather g;
struct buffer_head **larr;
- int leaf = 0;
- int error, i;
+ int error, i, need_sort = 0, sort_id;
u64 lfn = leaf_no;
do {
@@ -1325,6 +1397,11 @@ static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx,
brelse(bh);
} while(lfn);
+ if (*depth < GFS2_DIR_MAX_DEPTH || !sdp->sd_args.ar_loccookie) {
+ need_sort = 1;
+ sort_offset = 0;
+ }
+
if (!entries)
return 0;
@@ -1338,8 +1415,8 @@ static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx,
larr = gfs2_alloc_sort_buffer((leaves + entries + 99) * sizeof(void *));
if (!larr)
goto out;
- darr = (const struct gfs2_dirent **)(larr + leaves);
- g.pdent = darr;
+ darr = (struct gfs2_dirent **)(larr + leaves);
+ g.pdent = (const struct gfs2_dirent **)darr;
g.offset = 0;
lfn = leaf_no;
@@ -1350,6 +1427,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx,
lf = (struct gfs2_leaf *)bh->b_data;
lfn = be64_to_cpu(lf->lf_next);
if (lf->lf_entries) {
+ offset = g.offset;
entries2 += be16_to_cpu(lf->lf_entries);
dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
gfs2_dirent_gather, NULL, &g);
@@ -1367,17 +1445,26 @@ static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx,
goto out_free;
}
error = 0;
+ sort_id = gfs2_set_cookies(sdp, bh, leaf, &darr[offset],
+ be16_to_cpu(lf->lf_entries));
+ if (!need_sort && sort_id >= 0) {
+ need_sort = 1;
+ sort_offset = offset + sort_id;
+ }
larr[leaf++] = bh;
} else {
+ larr[leaf++] = NULL;
brelse(bh);
}
} while(lfn);
BUG_ON(entries2 != entries);
- error = do_filldir_main(ip, ctx, darr, entries, copied);
+ error = do_filldir_main(ip, ctx, darr, entries, need_sort ?
+ sort_offset : entries, copied);
out_free:
for(i = 0; i < leaf; i++)
- brelse(larr[i]);
+ if (larr[i])
+ brelse(larr[i]);
kvfree(larr);
out:
return error;
@@ -1483,7 +1570,7 @@ int gfs2_dir_read(struct inode *inode, struct dir_context *ctx,
struct gfs2_inode *dip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct dirent_gather g;
- const struct gfs2_dirent **darr, *dent;
+ struct gfs2_dirent **darr, *dent;
struct buffer_head *dibh;
int copied = 0;
int error;
@@ -1507,7 +1594,7 @@ int gfs2_dir_read(struct inode *inode, struct dir_context *ctx,
/* 96 is max number of dirents which can be stuffed into an inode */
darr = kmalloc(96 * sizeof(struct gfs2_dirent *), GFP_NOFS);
if (darr) {
- g.pdent = darr;
+ g.pdent = (const struct gfs2_dirent **)darr;
g.offset = 0;
dent = gfs2_dirent_scan(inode, dibh->b_data, dibh->b_size,
gfs2_dirent_gather, NULL, &g);
@@ -1524,8 +1611,9 @@ int gfs2_dir_read(struct inode *inode, struct dir_context *ctx,
error = -EIO;
goto out;
}
+ gfs2_set_cookies(sdp, dibh, 0, darr, dip->i_entries);
error = do_filldir_main(dip, ctx, darr,
- dip->i_entries, &copied);
+ dip->i_entries, 0, &copied);
out:
kfree(darr);
}
@@ -1560,15 +1648,22 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name,
dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
if (dent) {
+ struct inode *inode;
+ u16 rahead;
+
if (IS_ERR(dent))
return ERR_CAST(dent);
dtype = be16_to_cpu(dent->de_type);
+ rahead = be16_to_cpu(dent->de_rahead);
addr = be64_to_cpu(dent->de_inum.no_addr);
formal_ino = be64_to_cpu(dent->de_inum.no_formal_ino);
brelse(bh);
if (fail_on_exist)
return ERR_PTR(-EEXIST);
- return gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino, 0);
+ inode = gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino, 0);
+ if (!IS_ERR(inode))
+ GFS2_I(inode)->i_rahead = rahead;
+ return inode;
}
return ERR_PTR(-ENOENT);
}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 5e425469f..c9384f932 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -298,9 +298,9 @@ static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
gfsflags &= ~GFS2_DIF_TOPDIR;
if (gfsflags & GFS2_DIF_INHERIT_JDATA)
gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA);
- return do_gfs2_set_flags(filp, gfsflags, ~0);
+ return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_SYSTEM);
}
- return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA);
+ return do_gfs2_set_flags(filp, gfsflags, ~(GFS2_DIF_SYSTEM | GFS2_DIF_JDATA));
}
static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
@@ -336,8 +336,8 @@ static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size)
size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift;
int hint = min_t(size_t, INT_MAX, blks);
- if (hint > atomic_read(&ip->i_res->rs_sizehint))
- atomic_set(&ip->i_res->rs_sizehint, hint);
+ if (hint > atomic_read(&ip->i_res.rs_sizehint))
+ atomic_set(&ip->i_res.rs_sizehint, hint);
}
/**
@@ -397,14 +397,10 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
/* Update file times before taking page lock */
file_update_time(vma->vm_file);
- ret = get_write_access(inode);
+ ret = gfs2_rsqa_alloc(ip);
if (ret)
goto out;
- ret = gfs2_rs_alloc(ip);
- if (ret)
- goto out_write_access;
-
gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE);
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
@@ -486,8 +482,6 @@ out_uninit:
set_page_dirty(page);
wait_for_stable_page(page);
}
-out_write_access:
- put_write_access(inode);
out:
sb_end_pagefault(inode->i_sb);
return block_page_mkwrite_return(ret);
@@ -623,7 +617,7 @@ static int gfs2_release(struct inode *inode, struct file *file)
if (!(file->f_mode & FMODE_WRITE))
return 0;
- gfs2_rs_delete(ip, &inode->i_writecount);
+ gfs2_rsqa_delete(ip, &inode->i_writecount);
return 0;
}
@@ -703,7 +697,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct gfs2_inode *ip = GFS2_I(file_inode(file));
int ret;
- ret = gfs2_rs_alloc(ip);
+ ret = gfs2_rsqa_alloc(ip);
if (ret)
return ret;
@@ -920,7 +914,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t le
if ((mode & ~FALLOC_FL_KEEP_SIZE) || gfs2_is_jdata(ip))
return -EOPNOTSUPP;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
ret = gfs2_glock_nq(&gh);
@@ -938,20 +932,21 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t le
if (ret)
goto out_unlock;
- ret = gfs2_rs_alloc(ip);
+ ret = gfs2_rsqa_alloc(ip);
if (ret)
goto out_putw;
ret = __gfs2_fallocate(file, mode, offset, len);
if (ret)
- gfs2_rs_deltree(ip->i_res);
+ gfs2_rs_deltree(&ip->i_res);
+
out_putw:
put_write_access(inode);
out_unlock:
gfs2_glock_dq(&gh);
out_uninit:
gfs2_holder_uninit(&gh);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
@@ -962,7 +957,7 @@ static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
int error;
struct gfs2_inode *ip = GFS2_I(out->f_mapping->host);
- error = gfs2_rs_alloc(ip);
+ error = gfs2_rsqa_alloc(ip);
if (error)
return (ssize_t)error;
@@ -1018,7 +1013,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
struct gfs2_inode *ip = GFS2_I(file_inode(file));
struct gfs2_glock *gl;
unsigned int state;
- int flags;
+ u16 flags;
int error = 0;
int sleeptime;
@@ -1032,7 +1027,10 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
if (fl_gh->gh_state == state)
goto out;
locks_lock_file_wait(file,
- &(struct file_lock){.fl_type = F_UNLCK});
+ &(struct file_lock) {
+ .fl_type = F_UNLCK,
+ .fl_flags = FL_FLOCK
+ });
gfs2_glock_dq(fl_gh);
gfs2_holder_reinit(state, flags, fl_gh);
} else {
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 32e74710b..a4ff7b56f 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -446,7 +446,7 @@ __acquires(&gl->gl_lockref.lock)
{
const struct gfs2_glock_operations *glops = gl->gl_ops;
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- unsigned int lck_flags = gh ? gh->gh_flags : 0;
+ unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0);
int ret;
lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
@@ -750,7 +750,7 @@ again:
*
*/
-void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
+void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags,
struct gfs2_holder *gh)
{
INIT_LIST_HEAD(&gh->gh_list);
@@ -774,7 +774,7 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
*
*/
-void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *gh)
+void gfs2_holder_reinit(unsigned int state, u16 flags, struct gfs2_holder *gh)
{
gh->gh_state = state;
gh->gh_flags = flags;
@@ -1080,7 +1080,7 @@ void gfs2_glock_dq_uninit(struct gfs2_holder *gh)
int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
const struct gfs2_glock_operations *glops,
- unsigned int state, int flags, struct gfs2_holder *gh)
+ unsigned int state, u16 flags, struct gfs2_holder *gh)
{
struct gfs2_glock *gl;
int error;
@@ -1417,14 +1417,14 @@ static struct shrinker glock_shrinker = {
static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
{
struct gfs2_glock *gl;
- struct rhash_head *pos, *next;
+ struct rhash_head *pos;
const struct bucket_table *tbl;
int i;
rcu_read_lock();
tbl = rht_dereference_rcu(gl_hash_table.tbl, &gl_hash_table);
for (i = 0; i < tbl->size; i++) {
- rht_for_each_entry_safe(gl, pos, next, tbl, i, gl_node) {
+ rht_for_each_entry_rcu(gl, pos, tbl, i, gl_node) {
if ((gl->gl_name.ln_sbd == sdp) &&
lockref_get_not_dead(&gl->gl_lockref))
examiner(gl);
@@ -1506,7 +1506,9 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
flush_workqueue(glock_workqueue);
glock_hash_walk(clear_glock, sdp);
flush_workqueue(glock_workqueue);
- wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
+ wait_event_timeout(sdp->sd_glock_wait,
+ atomic_read(&sdp->sd_glock_disposal) == 0,
+ HZ * 600);
glock_hash_walk(dump_glock_func, sdp);
}
@@ -1539,7 +1541,7 @@ static const char *state2str(unsigned state)
return "??";
}
-static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags)
+static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
{
char *p = buf;
if (flags & LM_FLAG_TRY)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index f7cdaa8b4..46ab67fc1 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -79,15 +79,15 @@ enum {
* requested had acquired and released the lock.
*/
-#define LM_FLAG_TRY 0x00000001
-#define LM_FLAG_TRY_1CB 0x00000002
-#define LM_FLAG_NOEXP 0x00000004
-#define LM_FLAG_ANY 0x00000008
-#define LM_FLAG_PRIORITY 0x00000010
-#define GL_ASYNC 0x00000040
-#define GL_EXACT 0x00000080
-#define GL_SKIP 0x00000100
-#define GL_NOCACHE 0x00000400
+#define LM_FLAG_TRY 0x0001
+#define LM_FLAG_TRY_1CB 0x0002
+#define LM_FLAG_NOEXP 0x0004
+#define LM_FLAG_ANY 0x0008
+#define LM_FLAG_PRIORITY 0x0010
+#define GL_ASYNC 0x0040
+#define GL_EXACT 0x0080
+#define GL_SKIP 0x0100
+#define GL_NOCACHE 0x0400
/*
* lm_async_cb return flags
@@ -183,8 +183,8 @@ extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
int create, struct gfs2_glock **glp);
extern void gfs2_glock_put(struct gfs2_glock *gl);
extern void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state,
- unsigned flags, struct gfs2_holder *gh);
-extern void gfs2_holder_reinit(unsigned int state, unsigned flags,
+ u16 flags, struct gfs2_holder *gh);
+extern void gfs2_holder_reinit(unsigned int state, u16 flags,
struct gfs2_holder *gh);
extern void gfs2_holder_uninit(struct gfs2_holder *gh);
extern int gfs2_glock_nq(struct gfs2_holder *gh);
@@ -195,7 +195,7 @@ extern void gfs2_glock_dq_wait(struct gfs2_holder *gh);
extern void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
const struct gfs2_glock_operations *glops,
- unsigned int state, int flags,
+ unsigned int state, u16 flags,
struct gfs2_holder *gh);
extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
@@ -215,7 +215,7 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
*/
static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
- unsigned int state, int flags,
+ unsigned int state, u16 flags,
struct gfs2_holder *gh)
{
int error;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index f348cfb6b..437fd73e3 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -13,6 +13,7 @@
#include <linux/gfs2_ondisk.h>
#include <linux/bio.h>
#include <linux/posix_acl.h>
+#include <linux/security.h>
#include "gfs2.h"
#include "incore.h"
@@ -262,6 +263,7 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
if (ip) {
set_bit(GIF_INVALID, &ip->i_flags);
forget_all_cached_acls(&ip->i_inode);
+ security_inode_invalidate_secctx(&ip->i_inode);
gfs2_dir_hash_inval(ip);
}
}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index de7b4f97a..845fb09cc 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -259,8 +259,8 @@ struct gfs2_holder {
struct gfs2_glock *gh_gl;
struct pid *gh_owner_pid;
- unsigned int gh_state;
- unsigned gh_flags;
+ u16 gh_flags;
+ u16 gh_state;
int gh_error;
unsigned long gh_iflags; /* HIF_... */
@@ -270,6 +270,13 @@ struct gfs2_holder {
/* Number of quota types we support */
#define GFS2_MAXQUOTAS 2
+struct gfs2_qadata { /* quota allocation data */
+ /* Quota stuff */
+ struct gfs2_quota_data *qa_qd[2 * GFS2_MAXQUOTAS];
+ struct gfs2_holder qa_qd_ghs[2 * GFS2_MAXQUOTAS];
+ unsigned int qa_qd_num;
+};
+
/* Resource group multi-block reservation, in order of appearance:
Step 1. Function prepares to write, allocates a mb, sets the size hint.
@@ -288,11 +295,6 @@ struct gfs2_blkreserv {
struct gfs2_rbm rs_rbm; /* Start of reservation */
u32 rs_free; /* how many blocks are still free */
u64 rs_inum; /* Inode number for reservation */
-
- /* ancillary quota stuff */
- struct gfs2_quota_data *rs_qa_qd[2 * GFS2_MAXQUOTAS];
- struct gfs2_holder rs_qa_qd_ghs[2 * GFS2_MAXQUOTAS];
- unsigned int rs_qa_qd_num;
};
/*
@@ -391,7 +393,8 @@ struct gfs2_inode {
struct gfs2_glock *i_gl; /* Move into i_gh? */
struct gfs2_holder i_iopen_gh;
struct gfs2_holder i_gh; /* for prepare/commit_write only */
- struct gfs2_blkreserv *i_res; /* rgrp multi-block reservation */
+ struct gfs2_qadata *i_qadata; /* quota allocation data */
+ struct gfs2_blkreserv i_res; /* rgrp multi-block reservation */
struct gfs2_rgrpd *i_rgd;
u64 i_goal; /* goal block for allocations */
struct rw_semaphore i_rw_mutex;
@@ -402,6 +405,7 @@ struct gfs2_inode {
u32 i_diskflags;
u8 i_height;
u8 i_depth;
+ u16 i_rahead;
};
/*
@@ -558,6 +562,8 @@ struct gfs2_args {
unsigned int ar_errors:2; /* errors=withdraw | panic */
unsigned int ar_nobarrier:1; /* do not send barriers */
unsigned int ar_rgrplvb:1; /* use lvbs for rgrp info */
+ unsigned int ar_loccookie:1; /* use location based readdir
+ cookies */
int ar_commit; /* Commit interval */
int ar_statfs_quantum; /* The fast statfs interval */
int ar_quota_quantum; /* The quota interval */
@@ -685,6 +691,7 @@ struct gfs2_sbd {
u64 sd_heightsize[GFS2_MAX_META_HEIGHT + 1];
u32 sd_max_jheight; /* Max height of journaled file's meta tree */
u64 sd_jheightsize[GFS2_MAX_META_HEIGHT + 1];
+ u32 sd_max_dents_per_leaf; /* Max number of dirents in a leaf block */
struct gfs2_args sd_args; /* Mount arguments */
struct gfs2_tune sd_tune; /* Filesystem tuning structure */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 063fdfcf8..352f95876 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -191,13 +191,13 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
fail_refresh:
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
ip->i_iopen_gh.gh_gl->gl_object = NULL;
- gfs2_glock_dq_uninit(&ip->i_iopen_gh);
+ gfs2_glock_dq_wait(&ip->i_iopen_gh);
+ gfs2_holder_uninit(&ip->i_iopen_gh);
fail_iopen:
if (io_gl)
gfs2_glock_put(io_gl);
fail_put:
ip->i_gl->gl_object = NULL;
- gfs2_glock_put(ip->i_gl);
fail:
iget_failed(inode);
return ERR_PTR(error);
@@ -593,7 +593,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
struct gfs2_inode *dip = GFS2_I(dir), *ip;
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct gfs2_glock *io_gl;
- int error, free_vfs_inode = 0;
+ int error, free_vfs_inode = 1;
u32 aflags = 0;
unsigned blocks = 1;
struct gfs2_diradd da = { .bh = NULL, .save_loc = 1, };
@@ -601,7 +601,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (!name->len || name->len > GFS2_FNAMESIZE)
return -ENAMETOOLONG;
- error = gfs2_rs_alloc(dip);
+ error = gfs2_rsqa_alloc(dip);
if (error)
return error;
@@ -650,10 +650,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
error = posix_acl_create(dir, &mode, &default_acl, &acl);
if (error)
- goto fail_free_vfs_inode;
+ goto fail_gunlock;
ip = GFS2_I(inode);
- error = gfs2_rs_alloc(ip);
+ error = gfs2_rsqa_alloc(ip);
if (error)
goto fail_free_acls;
@@ -685,6 +685,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
ip->i_entries = 2;
break;
}
+
+ /* Force SYSTEM flag on all files and subdirs of a SYSTEM directory */
+ if (dip->i_diskflags & GFS2_DIF_SYSTEM)
+ ip->i_diskflags |= GFS2_DIF_SYSTEM;
+
gfs2_set_inode_flags(inode);
if ((GFS2_I(d_inode(sdp->sd_root_dir)) == dip) ||
@@ -733,6 +738,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
gfs2_set_iop(inode);
insert_inode_hash(inode);
+ free_vfs_inode = 0; /* After this point, the inode is no longer
+ considered free. Any failures need to undo
+ the gfs2 structures. */
if (default_acl) {
error = gfs2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
posix_acl_release(default_acl);
@@ -766,24 +774,19 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
return error;
fail_gunlock3:
- gfs2_glock_dq_uninit(ghs + 1);
- if (ip->i_gl)
- gfs2_glock_put(ip->i_gl);
- goto fail_gunlock;
-
+ gfs2_glock_dq_uninit(&ip->i_iopen_gh);
+ gfs2_glock_put(io_gl);
fail_gunlock2:
gfs2_glock_dq_uninit(ghs + 1);
fail_free_inode:
if (ip->i_gl)
gfs2_glock_put(ip->i_gl);
- gfs2_rs_delete(ip, NULL);
+ gfs2_rsqa_delete(ip, NULL);
fail_free_acls:
if (default_acl)
posix_acl_release(default_acl);
if (acl)
posix_acl_release(acl);
-fail_free_vfs_inode:
- free_vfs_inode = 1;
fail_gunlock:
gfs2_dir_no_add(&da);
gfs2_glock_dq_uninit(ghs);
@@ -898,7 +901,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
if (S_ISDIR(inode->i_mode))
return -EPERM;
- error = gfs2_rs_alloc(dip);
+ error = gfs2_rsqa_alloc(dip);
if (error)
return error;
@@ -1371,7 +1374,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
if (error)
return error;
- error = gfs2_rs_alloc(ndip);
+ error = gfs2_rsqa_alloc(ndip);
if (error)
return error;
@@ -1712,24 +1715,30 @@ static int gfs2_rename2(struct inode *odir, struct dentry *odentry,
}
/**
- * gfs2_follow_link - Follow a symbolic link
+ * gfs2_get_link - Follow a symbolic link
* @dentry: The dentry of the link
- * @nd: Data that we pass to vfs_follow_link()
+ * @inode: The inode of the link
+ * @done: destructor for return value
*
* This can handle symlinks of any size.
*
* Returns: 0 on success or error code
*/
-static const char *gfs2_follow_link(struct dentry *dentry, void **cookie)
+static const char *gfs2_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct gfs2_inode *ip = GFS2_I(d_inode(dentry));
+ struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder i_gh;
struct buffer_head *dibh;
unsigned int size;
char *buf;
int error;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
error = gfs2_glock_nq(&i_gh);
if (error) {
@@ -1759,7 +1768,7 @@ static const char *gfs2_follow_link(struct dentry *dentry, void **cookie)
out:
gfs2_glock_dq_uninit(&i_gh);
if (!IS_ERR(buf))
- *cookie = buf;
+ set_delayed_call(done, kfree_link, buf);
return buf;
}
@@ -1854,11 +1863,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid))
ogid = ngid = NO_GID_QUOTA_CHANGE;
- error = get_write_access(inode);
- if (error)
- return error;
-
- error = gfs2_rs_alloc(ip);
+ error = gfs2_rsqa_alloc(ip);
if (error)
goto out;
@@ -1898,7 +1903,6 @@ out_end_trans:
out_gunlock_q:
gfs2_quota_unlock(ip);
out:
- put_write_access(inode);
return error;
}
@@ -1920,7 +1924,7 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
struct gfs2_holder i_gh;
int error;
- error = gfs2_rs_alloc(ip);
+ error = gfs2_rsqa_alloc(ip);
if (error)
return error;
@@ -2002,7 +2006,7 @@ static int gfs2_setxattr(struct dentry *dentry, const char *name,
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
ret = gfs2_glock_nq(&gh);
if (ret == 0) {
- ret = gfs2_rs_alloc(ip);
+ ret = gfs2_rsqa_alloc(ip);
if (ret == 0)
ret = generic_setxattr(dentry, name, data, size, flags);
gfs2_glock_dq(&gh);
@@ -2043,7 +2047,7 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
ret = gfs2_glock_nq(&gh);
if (ret == 0) {
- ret = gfs2_rs_alloc(ip);
+ ret = gfs2_rsqa_alloc(ip);
if (ret == 0)
ret = generic_removexattr(dentry, name);
gfs2_glock_dq(&gh);
@@ -2063,7 +2067,7 @@ static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
if (ret)
@@ -2090,7 +2094,7 @@ static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
gfs2_glock_dq_uninit(&gh);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
@@ -2132,8 +2136,7 @@ const struct inode_operations gfs2_dir_iops = {
const struct inode_operations gfs2_symlink_iops = {
.readlink = generic_readlink,
- .follow_link = gfs2_follow_link,
- .put_link = kfree_put_link,
+ .get_link = gfs2_get_link,
.permission = gfs2_permission,
.setattr = gfs2_setattr,
.getattr = gfs2_getattr,
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 536e7a625..0ff028c15 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -716,6 +716,9 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
}
trace_gfs2_log_flush(sdp, 1);
+ if (type == SHUTDOWN_FLUSH)
+ clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+
sdp->sd_log_flush_head = sdp->sd_log_head;
sdp->sd_log_flush_wrapped = 0;
tr = sdp->sd_log_tr;
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index fb2b42cf4..f99f8e94d 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -41,7 +41,9 @@ static void gfs2_init_inode_once(void *foo)
inode_init_once(&ip->i_inode);
init_rwsem(&ip->i_rw_mutex);
INIT_LIST_HEAD(&ip->i_trunc_list);
- ip->i_res = NULL;
+ ip->i_qadata = NULL;
+ memset(&ip->i_res, 0, sizeof(ip->i_res));
+ RB_CLEAR_NODE(&ip->i_res.rs_node);
ip->i_hash_cache = NULL;
}
@@ -112,7 +114,8 @@ static int __init init_gfs2_fs(void)
gfs2_inode_cachep = kmem_cache_create("gfs2_inode",
sizeof(struct gfs2_inode),
0, SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD,
+ SLAB_MEM_SPREAD|
+ SLAB_ACCOUNT,
gfs2_init_inode_once);
if (!gfs2_inode_cachep)
goto fail;
@@ -135,10 +138,10 @@ static int __init init_gfs2_fs(void)
if (!gfs2_quotad_cachep)
goto fail;
- gfs2_rsrv_cachep = kmem_cache_create("gfs2_mblk",
- sizeof(struct gfs2_blkreserv),
+ gfs2_qadata_cachep = kmem_cache_create("gfs2_qadata",
+ sizeof(struct gfs2_qadata),
0, 0, NULL);
- if (!gfs2_rsrv_cachep)
+ if (!gfs2_qadata_cachep)
goto fail;
register_shrinker(&gfs2_qd_shrinker);
@@ -193,8 +196,8 @@ fail_lru:
unregister_shrinker(&gfs2_qd_shrinker);
gfs2_glock_exit();
- if (gfs2_rsrv_cachep)
- kmem_cache_destroy(gfs2_rsrv_cachep);
+ if (gfs2_qadata_cachep)
+ kmem_cache_destroy(gfs2_qadata_cachep);
if (gfs2_quotad_cachep)
kmem_cache_destroy(gfs2_quotad_cachep);
@@ -238,7 +241,7 @@ static void __exit exit_gfs2_fs(void)
rcu_barrier();
mempool_destroy(gfs2_page_pool);
- kmem_cache_destroy(gfs2_rsrv_cachep);
+ kmem_cache_destroy(gfs2_qadata_cachep);
kmem_cache_destroy(gfs2_quotad_cachep);
kmem_cache_destroy(gfs2_rgrpd_cachep);
kmem_cache_destroy(gfs2_bufdata_cachep);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 0e1d4be58..e137d96f1 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -187,6 +187,52 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
return bh;
}
+static void gfs2_meta_read_endio(struct bio *bio)
+{
+ struct bio_vec *bvec;
+ int i;
+
+ bio_for_each_segment_all(bvec, bio, i) {
+ struct page *page = bvec->bv_page;
+ struct buffer_head *bh = page_buffers(page);
+ unsigned int len = bvec->bv_len;
+
+ while (bh_offset(bh) < bvec->bv_offset)
+ bh = bh->b_this_page;
+ do {
+ struct buffer_head *next = bh->b_this_page;
+ len -= bh->b_size;
+ bh->b_end_io(bh, !bio->bi_error);
+ bh = next;
+ } while (bh && len);
+ }
+ bio_put(bio);
+}
+
+/*
+ * Submit several consecutive buffer head I/O requests as a single bio I/O
+ * request. (See submit_bh_wbc.)
+ */
+static void gfs2_submit_bhs(int rw, struct buffer_head *bhs[], int num)
+{
+ struct buffer_head *bh = bhs[0];
+ struct bio *bio;
+ int i;
+
+ if (!num)
+ return;
+
+ bio = bio_alloc(GFP_NOIO, num);
+ bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+ bio->bi_bdev = bh->b_bdev;
+ for (i = 0; i < num; i++) {
+ bh = bhs[i];
+ bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
+ }
+ bio->bi_end_io = gfs2_meta_read_endio;
+ submit_bio(rw, bio);
+}
+
/**
* gfs2_meta_read - Read a block from disk
* @gl: The glock covering the block
@@ -198,10 +244,11 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
*/
int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
- struct buffer_head **bhp)
+ int rahead, struct buffer_head **bhp)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- struct buffer_head *bh;
+ struct buffer_head *bh, *bhs[2];
+ int num = 0;
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
*bhp = NULL;
@@ -213,14 +260,31 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
lock_buffer(bh);
if (buffer_uptodate(bh)) {
unlock_buffer(bh);
- return 0;
+ flags &= ~DIO_WAIT;
+ } else {
+ bh->b_end_io = end_buffer_read_sync;
+ get_bh(bh);
+ bhs[num++] = bh;
}
- bh->b_end_io = end_buffer_read_sync;
- get_bh(bh);
- submit_bh(READ_SYNC | REQ_META | REQ_PRIO, bh);
+
+ if (rahead) {
+ bh = gfs2_getbuf(gl, blkno + 1, CREATE);
+
+ lock_buffer(bh);
+ if (buffer_uptodate(bh)) {
+ unlock_buffer(bh);
+ brelse(bh);
+ } else {
+ bh->b_end_io = end_buffer_read_sync;
+ bhs[num++] = bh;
+ }
+ }
+
+ gfs2_submit_bhs(READ_SYNC | REQ_META | REQ_PRIO, bhs, num);
if (!(flags & DIO_WAIT))
return 0;
+ bh = *bhp;
wait_on_buffer(bh);
if (unlikely(!buffer_uptodate(bh))) {
struct gfs2_trans *tr = current->journal_info;
@@ -341,8 +405,12 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
struct buffer_head *bh;
int ret = 0;
u32 mtype = height ? GFS2_METATYPE_IN : GFS2_METATYPE_DI;
+ int rahead = 0;
+
+ if (num == ip->i_no_addr)
+ rahead = ip->i_rahead;
- ret = gfs2_meta_read(gl, num, DIO_WAIT, &bh);
+ ret = gfs2_meta_read(gl, num, DIO_WAIT, rahead, &bh);
if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) {
brelse(bh);
ret = -EIO;
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index 8ca161567..c5086c8af 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -53,7 +53,7 @@ static inline struct gfs2_sbd *gfs2_mapping2sbd(struct address_space *mapping)
extern struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno);
extern int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
- struct buffer_head **bhp);
+ int rahead, struct buffer_head **bhp);
extern int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
extern struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno,
int create);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index baab99b69..dbed9e243 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -352,6 +352,9 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
sdp->sd_jheightsize[x] = ~0;
gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT);
+ sdp->sd_max_dents_per_leaf = (sdp->sd_sb.sb_bsize -
+ sizeof(struct gfs2_leaf)) /
+ GFS2_MIN_DIRENT_SIZE;
return 0;
}
@@ -910,8 +913,7 @@ fail_qc_i:
fail_ut_i:
iput(sdp->sd_sc_inode);
fail:
- if (pn)
- iput(pn);
+ iput(pn);
return error;
}
@@ -1315,9 +1317,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
if ((flags ^ s->s_flags) & MS_RDONLY)
goto error_super;
} else {
- char b[BDEVNAME_SIZE];
-
- strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
+ snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
sb_set_blocksize(s, block_size(bdev));
error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0);
if (error)
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 3a3122653..a39891344 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -388,7 +388,7 @@ static int bh_get(struct gfs2_quota_data *qd)
error = gfs2_block_map(&ip->i_inode, block, &bh_map, 0);
if (error)
goto fail;
- error = gfs2_meta_read(ip->i_gl, bh_map.b_blocknr, DIO_WAIT, &bh);
+ error = gfs2_meta_read(ip->i_gl, bh_map.b_blocknr, DIO_WAIT, 0, &bh);
if (error)
goto fail;
error = -EIO;
@@ -527,37 +527,70 @@ static void qdsb_put(struct gfs2_quota_data *qd)
qd_put(qd);
}
+/**
+ * gfs2_qa_alloc - make sure we have a quota allocations data structure,
+ * if necessary
+ * @ip: the inode for this reservation
+ */
+int gfs2_qa_alloc(struct gfs2_inode *ip)
+{
+ int error = 0;
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+
+ if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
+ return 0;
+
+ down_write(&ip->i_rw_mutex);
+ if (ip->i_qadata == NULL) {
+ ip->i_qadata = kmem_cache_zalloc(gfs2_qadata_cachep, GFP_NOFS);
+ if (!ip->i_qadata)
+ error = -ENOMEM;
+ }
+ up_write(&ip->i_rw_mutex);
+ return error;
+}
+
+void gfs2_qa_delete(struct gfs2_inode *ip, atomic_t *wcount)
+{
+ down_write(&ip->i_rw_mutex);
+ if (ip->i_qadata && ((wcount == NULL) || (atomic_read(wcount) <= 1))) {
+ kmem_cache_free(gfs2_qadata_cachep, ip->i_qadata);
+ ip->i_qadata = NULL;
+ }
+ up_write(&ip->i_rw_mutex);
+}
+
int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_quota_data **qd;
int error;
- if (ip->i_res == NULL) {
- error = gfs2_rs_alloc(ip);
+ if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
+ return 0;
+
+ if (ip->i_qadata == NULL) {
+ error = gfs2_rsqa_alloc(ip);
if (error)
return error;
}
- qd = ip->i_res->rs_qa_qd;
+ qd = ip->i_qadata->qa_qd;
- if (gfs2_assert_warn(sdp, !ip->i_res->rs_qa_qd_num) ||
+ if (gfs2_assert_warn(sdp, !ip->i_qadata->qa_qd_num) ||
gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)))
return -EIO;
- if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
- return 0;
-
error = qdsb_get(sdp, make_kqid_uid(ip->i_inode.i_uid), qd);
if (error)
goto out;
- ip->i_res->rs_qa_qd_num++;
+ ip->i_qadata->qa_qd_num++;
qd++;
error = qdsb_get(sdp, make_kqid_gid(ip->i_inode.i_gid), qd);
if (error)
goto out;
- ip->i_res->rs_qa_qd_num++;
+ ip->i_qadata->qa_qd_num++;
qd++;
if (!uid_eq(uid, NO_UID_QUOTA_CHANGE) &&
@@ -565,7 +598,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
error = qdsb_get(sdp, make_kqid_uid(uid), qd);
if (error)
goto out;
- ip->i_res->rs_qa_qd_num++;
+ ip->i_qadata->qa_qd_num++;
qd++;
}
@@ -574,7 +607,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
error = qdsb_get(sdp, make_kqid_gid(gid), qd);
if (error)
goto out;
- ip->i_res->rs_qa_qd_num++;
+ ip->i_qadata->qa_qd_num++;
qd++;
}
@@ -587,17 +620,17 @@ out:
void gfs2_quota_unhold(struct gfs2_inode *ip)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- unsigned int x;
+ u32 x;
- if (ip->i_res == NULL)
+ if (ip->i_qadata == NULL)
return;
gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags));
- for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
- qdsb_put(ip->i_res->rs_qa_qd[x]);
- ip->i_res->rs_qa_qd[x] = NULL;
+ for (x = 0; x < ip->i_qadata->qa_qd_num; x++) {
+ qdsb_put(ip->i_qadata->qa_qd[x]);
+ ip->i_qadata->qa_qd[x] = NULL;
}
- ip->i_res->rs_qa_qd_num = 0;
+ ip->i_qadata->qa_qd_num = 0;
}
static int sort_qd(const void *a, const void *b)
@@ -843,7 +876,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
unsigned int nalloc = 0, blocks;
int error;
- error = gfs2_rs_alloc(ip);
+ error = gfs2_rsqa_alloc(ip);
if (error)
return error;
@@ -855,7 +888,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
return -ENOMEM;
sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
- mutex_lock(&ip->i_inode.i_mutex);
+ inode_lock(&ip->i_inode);
for (qx = 0; qx < num_qd; qx++) {
error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE,
GL_NOCACHE, &ghs[qx]);
@@ -920,7 +953,7 @@ out_alloc:
out:
while (qx--)
gfs2_glock_dq_uninit(&ghs[qx]);
- mutex_unlock(&ip->i_inode.i_mutex);
+ inode_unlock(&ip->i_inode);
kfree(ghs);
gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl, NORMAL_FLUSH);
return error;
@@ -1003,23 +1036,23 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_quota_data *qd;
- unsigned int x;
+ u32 x;
int error = 0;
- error = gfs2_quota_hold(ip, uid, gid);
- if (error)
- return error;
-
if (capable(CAP_SYS_RESOURCE) ||
sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
return 0;
- sort(ip->i_res->rs_qa_qd, ip->i_res->rs_qa_qd_num,
+ error = gfs2_quota_hold(ip, uid, gid);
+ if (error)
+ return error;
+
+ sort(ip->i_qadata->qa_qd, ip->i_qadata->qa_qd_num,
sizeof(struct gfs2_quota_data *), sort_qd, NULL);
- for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
- qd = ip->i_res->rs_qa_qd[x];
- error = do_glock(qd, NO_FORCE, &ip->i_res->rs_qa_qd_ghs[x]);
+ for (x = 0; x < ip->i_qadata->qa_qd_num; x++) {
+ qd = ip->i_qadata->qa_qd[x];
+ error = do_glock(qd, NO_FORCE, &ip->i_qadata->qa_qd_ghs[x]);
if (error)
break;
}
@@ -1028,7 +1061,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
set_bit(GIF_QD_LOCKED, &ip->i_flags);
else {
while (x--)
- gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]);
+ gfs2_glock_dq_uninit(&ip->i_qadata->qa_qd_ghs[x]);
gfs2_quota_unhold(ip);
}
@@ -1076,20 +1109,20 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_quota_data *qda[4];
unsigned int count = 0;
- unsigned int x;
+ u32 x;
int found;
if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
goto out;
- for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
+ for (x = 0; x < ip->i_qadata->qa_qd_num; x++) {
struct gfs2_quota_data *qd;
int sync;
- qd = ip->i_res->rs_qa_qd[x];
+ qd = ip->i_qadata->qa_qd[x];
sync = need_sync(qd);
- gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]);
+ gfs2_glock_dq_uninit(&ip->i_qadata->qa_qd_ghs[x]);
if (!sync)
continue;
@@ -1158,7 +1191,7 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_quota_data *qd;
s64 value, warn, limit;
- unsigned int x;
+ u32 x;
int error = 0;
ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */
@@ -1168,8 +1201,8 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
return 0;
- for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
- qd = ip->i_res->rs_qa_qd[x];
+ for (x = 0; x < ip->i_qadata->qa_qd_num; x++) {
+ qd = ip->i_qadata->qa_qd[x];
if (!(qid_eq(qd->qd_id, make_kqid_uid(uid)) ||
qid_eq(qd->qd_id, make_kqid_gid(gid))))
@@ -1216,15 +1249,17 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
kuid_t uid, kgid_t gid)
{
struct gfs2_quota_data *qd;
- unsigned int x;
+ u32 x;
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), change))
+ if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON ||
+ gfs2_assert_warn(sdp, change))
return;
if (ip->i_diskflags & GFS2_DIF_SYSTEM)
return;
- for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
- qd = ip->i_res->rs_qa_qd[x];
+ for (x = 0; x < ip->i_qadata->qa_qd_num; x++) {
+ qd = ip->i_qadata->qa_qd[x];
if (qid_eq(qd->qd_id, make_kqid_uid(uid)) ||
qid_eq(qd->qd_id, make_kqid_gid(gid))) {
@@ -1635,11 +1670,11 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
if (error)
return error;
- error = gfs2_rs_alloc(ip);
+ error = gfs2_rsqa_alloc(ip);
if (error)
goto out_put;
- mutex_lock(&ip->i_inode.i_mutex);
+ inode_lock(&ip->i_inode);
error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, 0, &q_gh);
if (error)
goto out_unlockput;
@@ -1704,7 +1739,7 @@ out_i:
out_q:
gfs2_glock_dq_uninit(&q_gh);
out_unlockput:
- mutex_unlock(&ip->i_inode.i_mutex);
+ inode_unlock(&ip->i_inode);
out_put:
qd_put(qd);
return error;
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index ad04b3aca..5e47c935a 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -18,6 +18,8 @@ struct gfs2_sbd;
#define NO_UID_QUOTA_CHANGE INVALID_UID
#define NO_GID_QUOTA_CHANGE INVALID_GID
+extern int gfs2_qa_alloc(struct gfs2_inode *ip);
+extern void gfs2_qa_delete(struct gfs2_inode *ip, atomic_t *wcount);
extern int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
extern void gfs2_quota_unhold(struct gfs2_inode *ip);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index c134c0462..07c0265aa 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -596,27 +596,13 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
}
/**
- * gfs2_rs_alloc - make sure we have a reservation assigned to the inode
+ * gfs2_rsqa_alloc - make sure we have a reservation assigned to the inode
+ * plus a quota allocations data structure, if necessary
* @ip: the inode for this reservation
*/
-int gfs2_rs_alloc(struct gfs2_inode *ip)
+int gfs2_rsqa_alloc(struct gfs2_inode *ip)
{
- int error = 0;
-
- down_write(&ip->i_rw_mutex);
- if (ip->i_res)
- goto out;
-
- ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
- if (!ip->i_res) {
- error = -ENOMEM;
- goto out;
- }
-
- RB_CLEAR_NODE(&ip->i_res->rs_node);
-out:
- up_write(&ip->i_rw_mutex);
- return error;
+ return gfs2_qa_alloc(ip);
}
static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
@@ -678,21 +664,20 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
}
/**
- * gfs2_rs_delete - delete a multi-block reservation
+ * gfs2_rsqa_delete - delete a multi-block reservation and quota allocation
* @ip: The inode for this reservation
* @wcount: The inode's write count, or NULL
*
*/
-void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount)
+void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount)
{
down_write(&ip->i_rw_mutex);
- if (ip->i_res && ((wcount == NULL) || (atomic_read(wcount) <= 1))) {
- gfs2_rs_deltree(ip->i_res);
- BUG_ON(ip->i_res->rs_free);
- kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
- ip->i_res = NULL;
+ if ((wcount == NULL) || (atomic_read(wcount) <= 1)) {
+ gfs2_rs_deltree(&ip->i_res);
+ BUG_ON(ip->i_res.rs_free);
}
up_write(&ip->i_rw_mutex);
+ gfs2_qa_delete(ip, wcount);
}
/**
@@ -1158,7 +1143,7 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
for (x = 0; x < length; x++) {
bi = rgd->rd_bits + x;
- error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
+ error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, 0, &bi->bi_bh);
if (error)
goto fail;
}
@@ -1456,7 +1441,7 @@ static void rs_insert(struct gfs2_inode *ip)
{
struct rb_node **newn, *parent = NULL;
int rc;
- struct gfs2_blkreserv *rs = ip->i_res;
+ struct gfs2_blkreserv *rs = &ip->i_res;
struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
@@ -1503,7 +1488,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
{
struct gfs2_rbm rbm = { .rgd = rgd, };
u64 goal;
- struct gfs2_blkreserv *rs = ip->i_res;
+ struct gfs2_blkreserv *rs = &ip->i_res;
u32 extlen;
u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;
int ret;
@@ -1574,7 +1559,7 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
}
if (n) {
- while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) {
+ while ((rs_cmp(block, length, rs) == 0) && (&ip->i_res != rs)) {
block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
n = n->rb_right;
if (n == NULL)
@@ -1804,7 +1789,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
continue;
*last_unlinked = block;
- error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl);
+ error = gfs2_glock_get(sdp, block, &gfs2_iopen_glops, CREATE, &gl);
if (error)
continue;
@@ -1984,7 +1969,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *begin = NULL;
- struct gfs2_blkreserv *rs = ip->i_res;
+ struct gfs2_blkreserv *rs = &ip->i_res;
int error = 0, rg_locked, flags = 0;
u64 last_unlinked = NO_BLOCK;
int loops = 0;
@@ -2113,7 +2098,7 @@ next_rgrp:
void gfs2_inplace_release(struct gfs2_inode *ip)
{
- struct gfs2_blkreserv *rs = ip->i_res;
+ struct gfs2_blkreserv *rs = &ip->i_res;
if (rs->rs_rgd_gh.gh_gl)
gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
@@ -2267,7 +2252,7 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
static void gfs2_adjust_reservation(struct gfs2_inode *ip,
const struct gfs2_rbm *rbm, unsigned len)
{
- struct gfs2_blkreserv *rs = ip->i_res;
+ struct gfs2_blkreserv *rs = &ip->i_res;
struct gfs2_rgrpd *rgd = rbm->rgd;
unsigned rlen;
u64 block;
@@ -2310,8 +2295,8 @@ static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
{
u64 goal;
- if (gfs2_rs_active(ip->i_res)) {
- *rbm = ip->i_res->rs_rbm;
+ if (gfs2_rs_active(&ip->i_res)) {
+ *rbm = ip->i_res.rs_rbm;
return;
}
@@ -2365,7 +2350,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
gfs2_alloc_extent(&rbm, dinode, nblocks);
block = gfs2_rbm_to_block(&rbm);
rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
- if (gfs2_rs_active(ip->i_res))
+ if (gfs2_rs_active(&ip->i_res))
gfs2_adjust_reservation(ip, &rbm, *nblocks);
ndata = *nblocks;
if (dinode)
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index c0ab33fa3..66b51cf66 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -49,9 +49,9 @@ extern void gfs2_inplace_release(struct gfs2_inode *ip);
extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
bool dinode, u64 *generation);
-extern int gfs2_rs_alloc(struct gfs2_inode *ip);
+extern int gfs2_rsqa_alloc(struct gfs2_inode *ip);
extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
-extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount);
+extern void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount);
extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
@@ -78,7 +78,7 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
extern int gfs2_fitrim(struct file *filp, void __user *argp);
/* This is how to tell if a reservation is in the rgrp tree: */
-static inline bool gfs2_rs_active(struct gfs2_blkreserv *rs)
+static inline bool gfs2_rs_active(const struct gfs2_blkreserv *rs)
{
return rs && !RB_EMPTY_NODE(&rs->rs_node);
}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 894fb01a9..8f960a51a 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -83,6 +83,8 @@ enum {
Opt_nobarrier,
Opt_rgrplvb,
Opt_norgrplvb,
+ Opt_loccookie,
+ Opt_noloccookie,
Opt_error,
};
@@ -122,6 +124,8 @@ static const match_table_t tokens = {
{Opt_nobarrier, "nobarrier"},
{Opt_rgrplvb, "rgrplvb"},
{Opt_norgrplvb, "norgrplvb"},
+ {Opt_loccookie, "loccookie"},
+ {Opt_noloccookie, "noloccookie"},
{Opt_error, NULL}
};
@@ -278,6 +282,12 @@ int gfs2_mount_args(struct gfs2_args *args, char *options)
case Opt_norgrplvb:
args->ar_rgrplvb = 0;
break;
+ case Opt_loccookie:
+ args->ar_loccookie = 1;
+ break;
+ case Opt_noloccookie:
+ args->ar_loccookie = 0;
+ break;
case Opt_error:
default:
pr_warn("invalid mount option: %s\n", o);
@@ -556,6 +566,7 @@ void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
gfs2_trans_add_meta(l_ip->i_gl, l_bh);
+ gfs2_trans_add_meta(m_ip->i_gl, m_bh);
spin_lock(&sdp->sd_statfs_spin);
m_sc->sc_total += l_sc->sc_total;
@@ -564,10 +575,8 @@ void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
memset(l_sc, 0, sizeof(struct gfs2_statfs_change));
memset(l_bh->b_data + sizeof(struct gfs2_dinode),
0, sizeof(struct gfs2_statfs_change));
- spin_unlock(&sdp->sd_statfs_spin);
-
- gfs2_trans_add_meta(m_ip->i_gl, m_bh);
gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
+ spin_unlock(&sdp->sd_statfs_spin);
}
int gfs2_statfs_sync(struct super_block *sb, int type)
@@ -842,10 +851,6 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
gfs2_quota_sync(sdp->sd_vfs, 0);
gfs2_statfs_sync(sdp->sd_vfs, 0);
- down_write(&sdp->sd_log_flush_lock);
- clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
- up_write(&sdp->sd_log_flush_lock);
-
gfs2_log_flush(sdp, NULL, SHUTDOWN_FLUSH);
wait_event(sdp->sd_reserving_log_wait, atomic_read(&sdp->sd_reserving_log) == 0);
gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks);
@@ -1419,6 +1424,8 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
seq_puts(s, ",demote_interface_used");
if (args->ar_rgrplvb)
seq_puts(s, ",rgrplvb");
+ if (args->ar_loccookie)
+ seq_puts(s, ",loccookie");
return 0;
}
@@ -1512,6 +1519,7 @@ static void gfs2_evict_inode(struct inode *inode)
struct gfs2_sbd *sdp = sb->s_fs_info;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder gh;
+ struct address_space *metamapping;
int error;
if (test_bit(GIF_FREE_VFS_INODE, &ip->i_flags)) {
@@ -1526,7 +1534,8 @@ static void gfs2_evict_inode(struct inode *inode)
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
if (unlikely(error)) {
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
- gfs2_glock_dq_uninit(&ip->i_iopen_gh);
+ gfs2_glock_dq_wait(&ip->i_iopen_gh);
+ gfs2_holder_uninit(&ip->i_iopen_gh);
goto out;
}
@@ -1575,8 +1584,8 @@ static void gfs2_evict_inode(struct inode *inode)
out_truncate:
gfs2_log_flush(sdp, ip->i_gl, NORMAL_FLUSH);
+ metamapping = gfs2_glock2aspace(ip->i_gl);
if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) {
- struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl);
filemap_fdatawrite(metamapping);
filemap_fdatawait(metamapping);
}
@@ -1589,16 +1598,17 @@ out_truncate:
goto out_unlock;
/* Needs to be done before glock release & also in a transaction */
truncate_inode_pages(&inode->i_data, 0);
+ truncate_inode_pages(metamapping, 0);
gfs2_trans_end(sdp);
out_unlock:
/* Error path for case 1 */
- if (gfs2_rs_active(ip->i_res))
- gfs2_rs_deltree(ip->i_res);
+ if (gfs2_rs_active(&ip->i_res))
+ gfs2_rs_deltree(&ip->i_res);
if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
- gfs2_glock_dq(&ip->i_iopen_gh);
+ gfs2_glock_dq_wait(&ip->i_iopen_gh);
}
gfs2_holder_uninit(&ip->i_iopen_gh);
gfs2_glock_dq_uninit(&gh);
@@ -1607,7 +1617,7 @@ out_unlock:
out:
/* Case 3 starts here */
truncate_inode_pages_final(&inode->i_data);
- gfs2_rs_delete(ip, NULL);
+ gfs2_rsqa_delete(ip, NULL);
gfs2_ordered_del_inode(ip);
clear_inode(inode);
gfs2_dir_hash_inval(ip);
@@ -1619,7 +1629,8 @@ out:
if (ip->i_iopen_gh.gh_gl) {
ip->i_iopen_gh.gh_gl->gl_object = NULL;
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
- gfs2_glock_dq_uninit(&ip->i_iopen_gh);
+ gfs2_glock_dq_wait(&ip->i_iopen_gh);
+ gfs2_holder_uninit(&ip->i_iopen_gh);
}
}
@@ -1632,7 +1643,9 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
ip->i_flags = 0;
ip->i_gl = NULL;
ip->i_rgd = NULL;
- ip->i_res = NULL;
+ memset(&ip->i_res, 0, sizeof(ip->i_res));
+ RB_CLEAR_NODE(&ip->i_res.rs_node);
+ ip->i_rahead = 0;
}
return &ip->i_inode;
}
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 86d2035ac..cf6458357 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -27,7 +27,7 @@ struct kmem_cache *gfs2_inode_cachep __read_mostly;
struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
struct kmem_cache *gfs2_rgrpd_cachep __read_mostly;
struct kmem_cache *gfs2_quotad_cachep __read_mostly;
-struct kmem_cache *gfs2_rsrv_cachep __read_mostly;
+struct kmem_cache *gfs2_qadata_cachep __read_mostly;
mempool_t *gfs2_page_pool __read_mostly;
void gfs2_assert_i(struct gfs2_sbd *sdp)
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index cbdcbdf39..c81295f40 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -149,7 +149,7 @@ extern struct kmem_cache *gfs2_inode_cachep;
extern struct kmem_cache *gfs2_bufdata_cachep;
extern struct kmem_cache *gfs2_rgrpd_cachep;
extern struct kmem_cache *gfs2_quotad_cachep;
-extern struct kmem_cache *gfs2_rsrv_cachep;
+extern struct kmem_cache *gfs2_qadata_cachep;
extern mempool_t *gfs2_page_pool;
static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 53ce76a37..e8dfb4740 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -119,7 +119,7 @@ static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data)
__be64 *eablk, *end;
int error;
- error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, &bh);
+ error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, 0, &bh);
if (error)
return error;
@@ -143,7 +143,7 @@ static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data)
break;
bn = be64_to_cpu(*eablk);
- error = gfs2_meta_read(ip->i_gl, bn, DIO_WAIT, &eabh);
+ error = gfs2_meta_read(ip->i_gl, bn, DIO_WAIT, 0, &eabh);
if (error)
break;
error = ea_foreach_i(ip, eabh, ea_call, data);
@@ -477,7 +477,7 @@ static int gfs2_iter_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
return -ENOMEM;
for (x = 0; x < nptrs; x++) {
- error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs), 0,
+ error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs), 0, 0,
bh + x);
if (error) {
while (x--)
@@ -979,7 +979,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) {
__be64 *end;
- error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT,
+ error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, 0,
&indbh);
if (error)
return error;
@@ -1237,56 +1237,6 @@ static int gfs2_xattr_set(const struct xattr_handler *handler,
size, flags, handler->flags);
}
-
-static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
- struct gfs2_ea_header *ea, char *data)
-{
- struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- unsigned int amount = GFS2_EA_DATA_LEN(ea);
- unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize);
- int ret;
-
- ret = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0);
- if (ret)
- return ret;
-
- ret = gfs2_iter_unstuffed(ip, ea, data, NULL);
- gfs2_trans_end(sdp);
-
- return ret;
-}
-
-int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
-{
- struct inode *inode = &ip->i_inode;
- struct gfs2_sbd *sdp = GFS2_SB(inode);
- struct gfs2_ea_location el;
- int error;
-
- error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, GFS2_POSIX_ACL_ACCESS, &el);
- if (error)
- return error;
-
- if (GFS2_EA_IS_STUFFED(el.el_ea)) {
- error = gfs2_trans_begin(sdp, RES_DINODE + RES_EATTR, 0);
- if (error == 0) {
- gfs2_trans_add_meta(ip->i_gl, el.el_bh);
- memcpy(GFS2_EA2DATA(el.el_ea), data,
- GFS2_EA_DATA_LEN(el.el_ea));
- }
- } else {
- error = ea_acl_chmod_unstuffed(ip, el.el_ea, data);
- }
-
- brelse(el.el_bh);
- if (error)
- return error;
-
- error = gfs2_setattr_simple(inode, attr);
- gfs2_trans_end(sdp);
- return error;
-}
-
static int ea_dealloc_indirect(struct gfs2_inode *ip)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
@@ -1306,7 +1256,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
- error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, &indbh);
+ error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, 0, &indbh);
if (error)
return error;
diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h
index d392f8358..2d887c88e 100644
--- a/fs/gfs2/xattr.h
+++ b/fs/gfs2/xattr.h
@@ -62,6 +62,5 @@ extern int gfs2_ea_dealloc(struct gfs2_inode *ip);
/* Exported to acl.c */
extern int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **data);
-extern int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data);
#endif /* __EATTR_DOT_H__ */
diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c
index db458ee3a..1eb5d415d 100644
--- a/fs/hfs/catalog.c
+++ b/fs/hfs/catalog.c
@@ -214,7 +214,7 @@ int hfs_cat_delete(u32 cnid, struct inode *dir, struct qstr *str)
{
struct super_block *sb;
struct hfs_find_data fd;
- struct list_head *pos;
+ struct hfs_readdir_data *rd;
int res, type;
hfs_dbg(CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid);
@@ -240,9 +240,7 @@ int hfs_cat_delete(u32 cnid, struct inode *dir, struct qstr *str)
}
}
- list_for_each(pos, &HFS_I(dir)->open_dir_list) {
- struct hfs_readdir_data *rd =
- list_entry(pos, struct hfs_readdir_data, list);
+ list_for_each_entry(rd, &HFS_I(dir)->open_dir_list, list) {
if (fd.tree->keycmp(fd.search_key, (void *)&rd->key) < 0)
rd->file->f_pos--;
}
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 70788e038..e9f2b855f 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -173,9 +173,9 @@ static int hfs_dir_release(struct inode *inode, struct file *file)
{
struct hfs_readdir_data *rd = file->private_data;
if (rd) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
list_del(&rd->list);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
kfree(rd);
}
return 0;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index b99ebddb1..6686bf39a 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -570,13 +570,13 @@ static int hfs_file_release(struct inode *inode, struct file *file)
if (HFS_IS_RSRC(inode))
inode = HFS_I(inode)->rsrc_inode;
if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
hfs_file_truncate(inode);
//if (inode->i_flags & S_DEAD) {
// hfs_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL);
// hfs_delete_inode(inode);
//}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
return 0;
}
@@ -656,7 +656,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* sync the inode to buffers */
ret = write_inode_now(inode, 0);
@@ -668,7 +668,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
err = sync_blockdev(sb->s_bdev);
if (!ret)
ret = err;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index aa3f0d6d0..a3ec3ae7d 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -166,7 +166,7 @@ int hfs_mdb_get(struct super_block *sb)
pr_warn("continuing without an alternate MDB\n");
}
- HFS_SB(sb)->bitmap = (__be32 *)__get_free_pages(GFP_KERNEL, PAGE_SIZE < 8192 ? 1 : 0);
+ HFS_SB(sb)->bitmap = kmalloc(8192, GFP_KERNEL);
if (!HFS_SB(sb)->bitmap)
goto out;
@@ -360,7 +360,7 @@ void hfs_mdb_put(struct super_block *sb)
unload_nls(HFS_SB(sb)->nls_io);
unload_nls(HFS_SB(sb)->nls_disk);
- free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0);
+ kfree(HFS_SB(sb)->bitmap);
kfree(HFS_SB(sb));
sb->s_fs_info = NULL;
}
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 4574fdd3d..1ca95c232 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -483,8 +483,8 @@ static int __init init_hfs_fs(void)
int err;
hfs_inode_cachep = kmem_cache_create("hfs_inode_cache",
- sizeof(struct hfs_inode_info), 0, SLAB_HWCACHE_ALIGN,
- hfs_init_once);
+ sizeof(struct hfs_inode_info), 0,
+ SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, hfs_init_once);
if (!hfs_inode_cachep)
return -ENOMEM;
err = register_filesystem(&hfs_fs_type);
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index d0f39dcbb..a4e867e08 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -284,9 +284,9 @@ static int hfsplus_dir_release(struct inode *inode, struct file *file)
{
struct hfsplus_readdir_data *rd = file->private_data;
if (rd) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
list_del(&rd->list);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
kfree(rd);
}
return 0;
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 6dd107d74..1a6394cdb 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -229,14 +229,14 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
if (HFSPLUS_IS_RSRC(inode))
inode = HFSPLUS_I(inode)->rsrc_inode;
if (atomic_dec_and_test(&HFSPLUS_I(inode)->opencnt)) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
hfsplus_file_truncate(inode);
if (inode->i_flags & S_DEAD) {
hfsplus_delete_cat(inode->i_ino,
HFSPLUS_SB(sb)->hidden_dir, NULL);
hfsplus_delete_inode(inode);
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
return 0;
}
@@ -286,7 +286,7 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
error = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (error)
return error;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* Sync inode metadata into the catalog and extent trees.
@@ -327,7 +327,7 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags))
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return error;
}
@@ -403,6 +403,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode)
} else if (S_ISLNK(inode->i_mode)) {
sbi->file_count++;
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &hfsplus_aops;
hip->clump_blocks = 1;
} else
@@ -526,6 +527,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
inode->i_mapping->a_ops = &hfsplus_aops;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &hfsplus_aops;
} else {
init_special_inode(inode, inode->i_mode,
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index 0624ce4e0..32a49e292 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -93,7 +93,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
goto out_drop_write;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if ((flags & (FS_IMMUTABLE_FL|FS_APPEND_FL)) ||
inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
@@ -126,7 +126,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
mark_inode_dirty(inode);
out_unlock_inode:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
out_drop_write:
mnt_drop_write_file(file);
out:
diff --git a/fs/hfsplus/posix_acl.c b/fs/hfsplus/posix_acl.c
index df0c9af68..afb33eda6 100644
--- a/fs/hfsplus/posix_acl.c
+++ b/fs/hfsplus/posix_acl.c
@@ -21,10 +21,10 @@ struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type)
switch (type) {
case ACL_TYPE_ACCESS:
- xattr_name = POSIX_ACL_XATTR_ACCESS;
+ xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;
break;
case ACL_TYPE_DEFAULT:
- xattr_name = POSIX_ACL_XATTR_DEFAULT;
+ xattr_name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
return ERR_PTR(-EINVAL);
@@ -66,7 +66,7 @@ int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl,
switch (type) {
case ACL_TYPE_ACCESS:
- xattr_name = POSIX_ACL_XATTR_ACCESS;
+ xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;
if (acl) {
err = posix_acl_equiv_mode(acl, &inode->i_mode);
if (err < 0)
@@ -76,7 +76,7 @@ int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl,
break;
case ACL_TYPE_DEFAULT:
- xattr_name = POSIX_ACL_XATTR_DEFAULT;
+ xattr_name = XATTR_NAME_POSIX_ACL_DEFAULT;
if (!S_ISDIR(inode->i_mode))
return acl ? -EACCES : 0;
break;
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 7302d96ae..5d54490a1 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -663,7 +663,7 @@ static int __init init_hfsplus_fs(void)
int err;
hfsplus_inode_cachep = kmem_cache_create("hfsplus_icache",
- HFSPLUS_INODE_SIZE, 0, SLAB_HWCACHE_ALIGN,
+ HFSPLUS_INODE_SIZE, 0, SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT,
hfsplus_init_once);
if (!hfsplus_inode_cachep)
return -ENOMEM;
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index e41a010cd..ab01530b4 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -431,9 +431,6 @@ int hfsplus_setxattr(struct dentry *dentry, const char *name,
char *xattr_name;
int res;
- if (!strcmp(name, ""))
- return -EINVAL;
-
xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
GFP_KERNEL);
if (!xattr_name)
@@ -589,9 +586,6 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
int res;
char *xattr_name;
- if (!strcmp(name, ""))
- return -EINVAL;
-
xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
GFP_KERNEL);
if (!xattr_name)
@@ -853,9 +847,6 @@ static int hfsplus_osx_getxattr(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (!strcmp(name, ""))
- return -EINVAL;
-
/*
* Don't allow retrieving properly prefixed attributes
* by prepending them with "osx."
@@ -876,9 +867,6 @@ static int hfsplus_osx_setxattr(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *buffer, size_t size, int flags)
{
- if (!strcmp(name, ""))
- return -EINVAL;
-
/*
* Don't allow setting properly prefixed attributes
* by prepending them with "osx."
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 5a7b3229b..d1abbee28 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -223,7 +223,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
{
struct hostfs_inode_info *hi;
- hi = kmalloc(sizeof(*hi), GFP_KERNEL);
+ hi = kmalloc(sizeof(*hi), GFP_KERNEL_ACCOUNT);
if (hi == NULL)
return NULL;
hi->fd = -1;
@@ -378,9 +378,9 @@ static int hostfs_fsync(struct file *file, loff_t start, loff_t end,
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = fsync_file(HOSTFS_I(inode)->fd, datasync);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
@@ -890,9 +890,14 @@ static const struct inode_operations hostfs_dir_iops = {
.setattr = hostfs_setattr,
};
-static const char *hostfs_follow_link(struct dentry *dentry, void **cookie)
+static const char *hostfs_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- char *link = __getname();
+ char *link;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+ link = kmalloc(PATH_MAX, GFP_KERNEL);
if (link) {
char *path = dentry_name(dentry);
int err = -ENOMEM;
@@ -903,25 +908,20 @@ static const char *hostfs_follow_link(struct dentry *dentry, void **cookie)
__putname(path);
}
if (err < 0) {
- __putname(link);
+ kfree(link);
return ERR_PTR(err);
}
} else {
return ERR_PTR(-ENOMEM);
}
- return *cookie = link;
-}
-
-static void hostfs_put_link(struct inode *unused, void *cookie)
-{
- __putname(cookie);
+ set_delayed_call(done, kfree_link, link);
+ return link;
}
static const struct inode_operations hostfs_link_iops = {
.readlink = generic_readlink,
- .follow_link = hostfs_follow_link,
- .put_link = hostfs_put_link,
+ .get_link = hostfs_get_link,
};
static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index dc540bfce..e57a53c13 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -33,7 +33,7 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
if (whence == SEEK_DATA || whence == SEEK_HOLE)
return -EINVAL;
- mutex_lock(&i->i_mutex);
+ inode_lock(i);
hpfs_lock(s);
/*pr_info("dir lseek\n");*/
@@ -48,12 +48,12 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
ok:
filp->f_pos = new_off;
hpfs_unlock(s);
- mutex_unlock(&i->i_mutex);
+ inode_unlock(i);
return new_off;
fail:
/*pr_warn("illegal lseek: %016llx\n", new_off);*/
hpfs_unlock(s);
- mutex_unlock(&i->i_mutex);
+ inode_unlock(i);
return -ESPIPE;
}
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 933c73780..1f3c6d762 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -77,6 +77,7 @@ void hpfs_read_inode(struct inode *i)
kfree(ea);
i->i_mode = S_IFLNK | 0777;
i->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(i);
i->i_data.a_ops = &hpfs_symlink_aops;
set_nlink(i, 1);
i->i_size = ea_size;
diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c
index a69bbc1e8..a13692918 100644
--- a/fs/hpfs/map.c
+++ b/fs/hpfs/map.c
@@ -133,7 +133,7 @@ __le32 *hpfs_load_bitmap_directory(struct super_block *s, secno bmp)
void hpfs_load_hotfix_map(struct super_block *s, struct hpfs_spare_block *spareblock)
{
struct quad_buffer_head qbh;
- u32 *directory;
+ __le32 *directory;
u32 n_hotfixes, n_used_hotfixes;
unsigned i;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index bffb908ac..bb8d67e27 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -332,6 +332,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
result->i_blocks = 1;
set_nlink(result, 1);
result->i_size = strlen(symlink);
+ inode_nohighmem(result);
result->i_op = &page_symlink_inode_operations;
result->i_data.a_ops = &hpfs_symlink_aops;
@@ -475,7 +476,7 @@ out:
static int hpfs_symlink_readpage(struct file *file, struct page *page)
{
- char *link = kmap(page);
+ char *link = page_address(page);
struct inode *i = page->mapping->host;
struct fnode *fnode;
struct buffer_head *bh;
@@ -491,14 +492,12 @@ static int hpfs_symlink_readpage(struct file *file, struct page *page)
goto fail;
hpfs_unlock(i->i_sb);
SetPageUptodate(page);
- kunmap(page);
unlock_page(page);
return 0;
fail:
hpfs_unlock(i->i_sb);
SetPageError(page);
- kunmap(page);
unlock_page(page);
return err;
}
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index a56159189..458cf4630 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -261,7 +261,7 @@ static int init_inodecache(void)
hpfs_inode_cachep = kmem_cache_create("hpfs_inode_cache",
sizeof(struct hpfs_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
init_once);
if (hpfs_inode_cachep == NULL)
return -ENOMEM;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 595ebdb41..e1f465a38 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -4,11 +4,11 @@
* Nadia Yvette Chambers, 2002
*
* Copyright (C) 2002 Linus Torvalds.
+ * License: GPL
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/module.h>
#include <linux/thread_info.h>
#include <asm/current.h>
#include <linux/sched.h> /* remove ASAP */
@@ -141,7 +141,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
vma_len = (loff_t)(vma->vm_end - vma->vm_start);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
file_accessed(file);
ret = -ENOMEM;
@@ -157,7 +157,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
if (vma->vm_flags & VM_WRITE && inode->i_size < len)
inode->i_size = len;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
@@ -324,11 +324,48 @@ static void remove_huge_page(struct page *page)
delete_from_page_cache(page);
}
+static void
+hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end)
+{
+ struct vm_area_struct *vma;
+
+ /*
+ * end == 0 indicates that the entire range after
+ * start should be unmapped.
+ */
+ vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) {
+ unsigned long v_offset;
+ unsigned long v_end;
+
+ /*
+ * Can the expression below overflow on 32-bit arches?
+ * No, because the interval tree returns us only those vmas
+ * which overlap the truncated area starting at pgoff,
+ * and no vma on a 32-bit arch can span beyond the 4GB.
+ */
+ if (vma->vm_pgoff < start)
+ v_offset = (start - vma->vm_pgoff) << PAGE_SHIFT;
+ else
+ v_offset = 0;
+
+ if (!end)
+ v_end = vma->vm_end;
+ else {
+ v_end = ((end - vma->vm_pgoff) << PAGE_SHIFT)
+ + vma->vm_start;
+ if (v_end > vma->vm_end)
+ v_end = vma->vm_end;
+ }
+
+ unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end,
+ NULL);
+ }
+}
/*
* remove_inode_hugepages handles two distinct cases: truncation and hole
* punch. There are subtle differences in operation for each case.
-
+ *
* truncation is indicated by end of range being LLONG_MAX
* In this case, we first scan the range and release found pages.
* After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
@@ -379,6 +416,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
for (i = 0; i < pagevec_count(&pvec); ++i) {
struct page *page = pvec.pages[i];
+ bool rsv_on_error;
u32 hash;
/*
@@ -395,37 +433,43 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
mapping, next, 0);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
- lock_page(page);
- if (likely(!page_mapped(page))) {
- bool rsv_on_error = !PagePrivate(page);
- /*
- * We must free the huge page and remove
- * from page cache (remove_huge_page) BEFORE
- * removing the region/reserve map
- * (hugetlb_unreserve_pages). In rare out
- * of memory conditions, removal of the
- * region/reserve map could fail. Before
- * free'ing the page, note PagePrivate which
- * is used in case of error.
- */
- remove_huge_page(page);
- freed++;
- if (!truncate_op) {
- if (unlikely(hugetlb_unreserve_pages(
- inode, next,
- next + 1, 1)))
- hugetlb_fix_reserve_counts(
- inode, rsv_on_error);
- }
- } else {
- /*
- * If page is mapped, it was faulted in after
- * being unmapped. It indicates a race between
- * hole punch and page fault. Do nothing in
- * this case. Getting here in a truncate
- * operation is a bug.
- */
+ /*
+ * If page is mapped, it was faulted in after being
+ * unmapped in caller. Unmap (again) now after taking
+ * the fault mutex. The mutex will prevent faults
+ * until we finish removing the page.
+ *
+ * This race can only happen in the hole punch case.
+ * Getting here in a truncate operation is a bug.
+ */
+ if (unlikely(page_mapped(page))) {
BUG_ON(truncate_op);
+
+ i_mmap_lock_write(mapping);
+ hugetlb_vmdelete_list(&mapping->i_mmap,
+ next * pages_per_huge_page(h),
+ (next + 1) * pages_per_huge_page(h));
+ i_mmap_unlock_write(mapping);
+ }
+
+ lock_page(page);
+ /*
+ * We must free the huge page and remove from page
+ * cache (remove_huge_page) BEFORE removing the
+ * region/reserve map (hugetlb_unreserve_pages). In
+ * rare out of memory conditions, removal of the
+ * region/reserve map could fail. Before free'ing
+ * the page, note PagePrivate which is used in case
+ * of error.
+ */
+ rsv_on_error = !PagePrivate(page);
+ remove_huge_page(page);
+ freed++;
+ if (!truncate_op) {
+ if (unlikely(hugetlb_unreserve_pages(inode,
+ next, next + 1, 1)))
+ hugetlb_fix_reserve_counts(inode,
+ rsv_on_error);
}
unlock_page(page);
@@ -452,44 +496,6 @@ static void hugetlbfs_evict_inode(struct inode *inode)
clear_inode(inode);
}
-static inline void
-hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end)
-{
- struct vm_area_struct *vma;
-
- /*
- * end == 0 indicates that the entire range after
- * start should be unmapped.
- */
- vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) {
- unsigned long v_offset;
- unsigned long v_end;
-
- /*
- * Can the expression below overflow on 32-bit arches?
- * No, because the interval tree returns us only those vmas
- * which overlap the truncated area starting at pgoff,
- * and no vma on a 32-bit arch can span beyond the 4GB.
- */
- if (vma->vm_pgoff < start)
- v_offset = (start - vma->vm_pgoff) << PAGE_SHIFT;
- else
- v_offset = 0;
-
- if (!end)
- v_end = vma->vm_end;
- else {
- v_end = ((end - vma->vm_pgoff) << PAGE_SHIFT)
- + vma->vm_start;
- if (v_end > vma->vm_end)
- v_end = vma->vm_end;
- }
-
- unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end,
- NULL);
- }
-}
-
static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
{
pgoff_t pgoff;
@@ -524,7 +530,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
if (hole_end > hole_start) {
struct address_space *mapping = inode->i_mapping;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
i_mmap_lock_write(mapping);
if (!RB_EMPTY_ROOT(&mapping->i_mmap))
hugetlb_vmdelete_list(&mapping->i_mmap,
@@ -532,7 +538,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
hole_end >> PAGE_SHIFT);
i_mmap_unlock_write(mapping);
remove_inode_hugepages(inode, hole_start, hole_end);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
return 0;
@@ -566,7 +572,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
start = offset >> hpage_shift;
end = (offset + len + hpage_size - 1) >> hpage_shift;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
error = inode_newsize_ok(inode, offset + len);
@@ -653,7 +659,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
i_size_write(inode, offset + len);
inode->i_ctime = CURRENT_TIME;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return error;
}
@@ -711,7 +717,7 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
/*
* Hugetlbfs is not reclaimable; therefore its i_mmap_rwsem will never
* be taken from reclaim -- unlike regular filesystems. This needs an
- * annotation because huge_pmd_share() does an allocation under
+ * annotation because huge_pmd_share() does an allocation under hugetlb's
* i_mmap_rwsem.
*/
static struct lock_class_key hugetlbfs_i_mmap_rwsem_key;
@@ -741,7 +747,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
/*
* The policy is initialized here even if we are creating a
* private inode because initialization simply creates an
- * an empty rb tree and calls spin_lock_init(), later when we
+ * an empty rb tree and calls rwlock_init(), later when we
* call mpol_free_shared_policy() it will just return because
* the rb tree will still be empty.
*/
@@ -763,6 +769,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
break;
case S_IFLNK:
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
break;
}
lockdep_annotate_inode_mutex_key(inode);
@@ -1204,7 +1211,6 @@ static struct file_system_type hugetlbfs_fs_type = {
.mount = hugetlbfs_mount,
.kill_sb = kill_litter_super,
};
-MODULE_ALIAS_FS("hugetlbfs");
static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE];
@@ -1324,7 +1330,7 @@ static int __init init_hugetlbfs_fs(void)
error = -ENOMEM;
hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache",
sizeof(struct hugetlbfs_inode_info),
- 0, 0, init_once);
+ 0, SLAB_ACCOUNT, init_once);
if (hugetlbfs_inode_cachep == NULL)
goto out2;
@@ -1358,26 +1364,4 @@ static int __init init_hugetlbfs_fs(void)
out2:
return error;
}
-
-static void __exit exit_hugetlbfs_fs(void)
-{
- struct hstate *h;
- int i;
-
-
- /*
- * Make sure all delayed rcu free inodes are flushed before we
- * destroy cache.
- */
- rcu_barrier();
- kmem_cache_destroy(hugetlbfs_inode_cachep);
- i = 0;
- for_each_hstate(h)
- kern_unmount(hugetlbfs_vfsmount[i++]);
- unregister_filesystem(&hugetlbfs_fs_type);
-}
-
-module_init(init_hugetlbfs_fs)
-module_exit(exit_hugetlbfs_fs)
-
-MODULE_LICENSE("GPL");
+fs_initcall(init_hugetlbfs_fs)
diff --git a/fs/inode.c b/fs/inode.c
index 1be5f9003..69b8b526c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -154,6 +154,12 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_rdev = 0;
inode->dirtied_when = 0;
+#ifdef CONFIG_CGROUP_WRITEBACK
+ inode->i_wb_frn_winner = 0;
+ inode->i_wb_frn_avg_time = 0;
+ inode->i_wb_frn_history = 0;
+#endif
+
if (security_inode_alloc(inode))
goto out;
spin_lock_init(&inode->i_lock);
@@ -225,7 +231,7 @@ void __destroy_inode(struct inode *inode)
inode_detach_wb(inode);
security_inode_free(inode);
fsnotify_inode_delete(inode);
- locks_free_lock_context(inode->i_flctx);
+ locks_free_lock_context(inode);
if (!inode->i_nlink) {
WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
atomic_long_dec(&inode->i_sb->s_remove_count);
@@ -495,7 +501,7 @@ void clear_inode(struct inode *inode)
*/
spin_lock_irq(&inode->i_data.tree_lock);
BUG_ON(inode->i_data.nrpages);
- BUG_ON(inode->i_data.nrshadows);
+ BUG_ON(inode->i_data.nrexceptional);
spin_unlock_irq(&inode->i_data.tree_lock);
BUG_ON(!list_empty(&inode->i_data.private_list));
BUG_ON(!(inode->i_state & I_FREEING));
@@ -966,9 +972,9 @@ void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
swap(inode1, inode2);
if (inode1 && !S_ISDIR(inode1->i_mode))
- mutex_lock(&inode1->i_mutex);
+ inode_lock(inode1);
if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
- mutex_lock_nested(&inode2->i_mutex, I_MUTEX_NONDIR2);
+ inode_lock_nested(inode2, I_MUTEX_NONDIR2);
}
EXPORT_SYMBOL(lock_two_nondirectories);
@@ -980,9 +986,9 @@ EXPORT_SYMBOL(lock_two_nondirectories);
void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2)
{
if (inode1 && !S_ISDIR(inode1->i_mode))
- mutex_unlock(&inode1->i_mutex);
+ inode_unlock(inode1);
if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
- mutex_unlock(&inode2->i_mutex);
+ inode_unlock(inode2);
}
EXPORT_SYMBOL(unlock_two_nondirectories);
@@ -1883,7 +1889,7 @@ void __init inode_init(void)
sizeof(struct inode),
0,
(SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
init_once);
/* Hash may have been set up in inode_init_early */
@@ -2028,3 +2034,9 @@ void inode_set_flags(struct inode *inode, unsigned int flags,
new_flags) != old_flags));
}
EXPORT_SYMBOL(inode_set_flags);
+
+void inode_nohighmem(struct inode *inode)
+{
+ mapping_set_gfp_mask(inode->i_mapping, GFP_USER);
+}
+EXPORT_SYMBOL(inode_nohighmem);
diff --git a/fs/internal.h b/fs/internal.h
index 71859c4d0..b71deeece 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -55,7 +55,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
/*
* namespace.c
*/
-extern int copy_mount_options(const void __user *, unsigned long *);
+extern void *copy_mount_options(const void __user *);
extern char *copy_mount_string(const void __user *);
extern struct vfsmount *lookup_mnt(struct path *);
@@ -151,3 +151,10 @@ extern void mnt_pin_kill(struct mount *m);
* fs/nsfs.c
*/
extern struct dentry_operations ns_dentry_operations;
+
+/*
+ * fs/ioctl.c
+ */
+extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
+ unsigned long arg);
+extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 5d01d2638..116a333e9 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -15,6 +15,7 @@
#include <linux/writeback.h>
#include <linux/buffer_head.h>
#include <linux/falloc.h>
+#include "internal.h"
#include <asm/ioctls.h>
@@ -32,8 +33,7 @@
*
* Returns 0 on success, -errno on error.
*/
-static long vfs_ioctl(struct file *filp, unsigned int cmd,
- unsigned long arg)
+long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
int error = -ENOTTY;
@@ -215,6 +215,29 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
return error;
}
+static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
+ u64 off, u64 olen, u64 destoff)
+{
+ struct fd src_file = fdget(srcfd);
+ int ret;
+
+ if (!src_file.file)
+ return -EBADF;
+ ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen);
+ fdput(src_file);
+ return ret;
+}
+
+static long ioctl_file_clone_range(struct file *file, void __user *argp)
+{
+ struct file_clone_range args;
+
+ if (copy_from_user(&args, argp, sizeof(args)))
+ return -EFAULT;
+ return ioctl_file_clone(file, args.src_fd, args.src_offset,
+ args.src_length, args.dest_offset);
+}
+
#ifdef CONFIG_BLOCK
static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
@@ -411,9 +434,9 @@ int generic_block_fiemap(struct inode *inode,
u64 len, get_block_t *get_block)
{
int ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
EXPORT_SYMBOL(generic_block_fiemap);
@@ -545,6 +568,41 @@ static int ioctl_fsthaw(struct file *filp)
return thaw_super(sb);
}
+static long ioctl_file_dedupe_range(struct file *file, void __user *arg)
+{
+ struct file_dedupe_range __user *argp = arg;
+ struct file_dedupe_range *same = NULL;
+ int ret;
+ unsigned long size;
+ u16 count;
+
+ if (get_user(count, &argp->dest_count)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ size = offsetof(struct file_dedupe_range __user, info[count]);
+
+ same = memdup_user(argp, size);
+ if (IS_ERR(same)) {
+ ret = PTR_ERR(same);
+ same = NULL;
+ goto out;
+ }
+
+ ret = vfs_dedupe_file_range(file, same);
+ if (ret)
+ goto out;
+
+ ret = copy_to_user(argp, same, size);
+ if (ret)
+ ret = -EFAULT;
+
+out:
+ kfree(same);
+ return ret;
+}
+
/*
* When you add any new common ioctls to the switches above and below
* please update compat_sys_ioctl() too.
@@ -600,6 +658,15 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
case FIGETBSZ:
return put_user(inode->i_sb->s_blocksize, argp);
+ case FICLONE:
+ return ioctl_file_clone(filp, arg, 0, 0, 0);
+
+ case FICLONERANGE:
+ return ioctl_file_clone_range(filp, argp);
+
+ case FIDEDUPERANGE:
+ return ioctl_file_dedupe_range(filp, argp);
+
default:
if (S_ISREG(inode->i_mode))
error = file_ioctl(filp, cmd, arg);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index d67a16f2a..bcd2d41b3 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -94,7 +94,7 @@ static int __init init_inodecache(void)
isofs_inode_cachep = kmem_cache_create("isofs_inode_cache",
sizeof(struct iso_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
init_once);
if (isofs_inode_cachep == NULL)
return -ENOMEM;
@@ -1417,6 +1417,7 @@ static int isofs_read_inode(struct inode *inode, int relocated)
inode->i_fop = &isofs_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_data.a_ops = &isofs_symlink_aops;
} else
/* XXX - parse_rock_ridge_inode() had already set i_rdev. */
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 735d7522a..5384ceb35 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -687,7 +687,7 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
struct inode *inode = page->mapping->host;
struct iso_inode_info *ei = ISOFS_I(inode);
struct isofs_sb_info *sbi = ISOFS_SB(inode->i_sb);
- char *link = kmap(page);
+ char *link = page_address(page);
unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
struct buffer_head *bh;
char *rpnt = link;
@@ -774,7 +774,6 @@ repeat:
brelse(bh);
*rpnt = '\0';
SetPageUptodate(page);
- kunmap(page);
unlock_page(page);
return 0;
@@ -791,7 +790,6 @@ fail:
brelse(bh);
error:
SetPageError(page);
- kunmap(page);
unlock_page(page);
return -EIO;
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index ca181e81c..081dff087 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -764,13 +764,11 @@ void jbd2_journal_unlock_updates (journal_t *journal)
static void warn_dirty_buffer(struct buffer_head *bh)
{
- char b[BDEVNAME_SIZE];
-
printk(KERN_WARNING
- "JBD2: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
+ "JBD2: Spotted dirty metadata buffer (dev = %pg, blocknr = %llu). "
"There's a risk of filesystem corruption in case of system "
"crash.\n",
- bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
+ bh->b_bdev, (unsigned long long)bh->b_blocknr);
}
/* Call t_frozen trigger and copy buffer data into jh->b_frozen_data. */
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index c1f04947d..b288c8ae1 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -17,6 +17,7 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mtd/mtd.h>
+#include <linux/mm.h> /* kvfree() */
#include "nodelist.h"
static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *,
@@ -422,12 +423,7 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c)
return 0;
out_free:
-#ifndef __ECOS
- if (jffs2_blocks_use_vmalloc(c))
- vfree(c->blocks);
- else
-#endif
- kfree(c->blocks);
+ kvfree(c->blocks);
return ret;
}
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index d211b8e18..30c4c9ebb 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -843,9 +843,14 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
pr_notice("%s(): Link succeeded, unlink failed (err %d). You now have a hard link\n",
__func__, ret);
- /* Might as well let the VFS know */
- d_instantiate(new_dentry, d_inode(old_dentry));
- ihold(d_inode(old_dentry));
+ /*
+ * We can't keep the target in dcache after that.
+ * For one thing, we can't afford dentry aliases for directories.
+ * For another, if there was a victim, we _can't_ set new inode
+ * for that sucker and we have to trigger mount eviction - the
+ * caller won't do it on its own since we are returning an error.
+ */
+ d_invalidate(new_dentry);
new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now);
return ret;
}
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 3361979d7..cad86bac3 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -39,10 +39,10 @@ int jffs2_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* Trigger GC to flush any pending writes for this inode */
jffs2_flush_wbuf_gc(c, inode->i_ino);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return 0;
}
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 2caf16820..bead25ae8 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -596,10 +596,7 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
out_root:
jffs2_free_ino_caches(c);
jffs2_free_raw_node_refs(c);
- if (jffs2_blocks_use_vmalloc(c))
- vfree(c->blocks);
- else
- kfree(c->blocks);
+ kvfree(c->blocks);
out_inohash:
jffs2_clear_xattr_subsystem(c);
kfree(c->inocache_list);
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c
index bf12fe5f8..7a28facd7 100644
--- a/fs/jffs2/security.c
+++ b/fs/jffs2/security.c
@@ -52,9 +52,6 @@ static int jffs2_security_getxattr(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (!strcmp(name, ""))
- return -EINVAL;
-
return do_jffs2_getxattr(d_inode(dentry), JFFS2_XPREFIX_SECURITY,
name, buffer, size);
}
@@ -63,31 +60,12 @@ static int jffs2_security_setxattr(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *buffer, size_t size, int flags)
{
- if (!strcmp(name, ""))
- return -EINVAL;
-
return do_jffs2_setxattr(d_inode(dentry), JFFS2_XPREFIX_SECURITY,
name, buffer, size, flags);
}
-static size_t jffs2_security_listxattr(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_size, const char *name,
- size_t name_len)
-{
- size_t retlen = XATTR_SECURITY_PREFIX_LEN + name_len + 1;
-
- if (list && retlen <= list_size) {
- strcpy(list, XATTR_SECURITY_PREFIX);
- strcpy(list + XATTR_SECURITY_PREFIX_LEN, name);
- }
-
- return retlen;
-}
-
const struct xattr_handler jffs2_security_xattr_handler = {
.prefix = XATTR_SECURITY_PREFIX,
- .list = jffs2_security_listxattr,
.set = jffs2_security_setxattr,
.get = jffs2_security_getxattr
};
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index d86c5e317..0a9a114bb 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -331,10 +331,7 @@ static void jffs2_put_super (struct super_block *sb)
jffs2_free_ino_caches(c);
jffs2_free_raw_node_refs(c);
- if (jffs2_blocks_use_vmalloc(c))
- vfree(c->blocks);
- else
- kfree(c->blocks);
+ kvfree(c->blocks);
jffs2_flash_cleanup(c);
kfree(c->inocache_list);
jffs2_clear_xattr_subsystem(c);
@@ -387,7 +384,7 @@ static int __init init_jffs2_fs(void)
jffs2_inode_cachep = kmem_cache_create("jffs2_i",
sizeof(struct jffs2_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
jffs2_i_init_once);
if (!jffs2_inode_cachep) {
pr_err("error: Failed to initialise inode cache\n");
diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c
index 8ce2f2401..2cabd649d 100644
--- a/fs/jffs2/symlink.c
+++ b/fs/jffs2/symlink.c
@@ -14,7 +14,7 @@
const struct inode_operations jffs2_symlink_inode_operations =
{
.readlink = generic_readlink,
- .follow_link = simple_follow_link,
+ .get_link = simple_get_link,
.setattr = jffs2_setattr,
.setxattr = jffs2_setxattr,
.getxattr = jffs2_getxattr,
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index f3a4857ff..5a3da3f52 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1153,7 +1153,7 @@ static struct jffs2_sb_info *work_to_sb(struct work_struct *work)
{
struct delayed_work *dwork;
- dwork = container_of(work, struct delayed_work, work);
+ dwork = to_delayed_work(work);
return container_of(dwork, struct jffs2_sb_info, wbuf_dwork);
}
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 4c2c03663..da3e18503 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -967,7 +967,8 @@ ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
struct jffs2_xattr_ref *ref, **pref;
struct jffs2_xattr_datum *xd;
const struct xattr_handler *xhandle;
- ssize_t len, rc;
+ const char *prefix;
+ ssize_t prefix_len, len, rc;
int retry = 0;
rc = check_xattr_ref_inode(c, ic);
@@ -998,18 +999,23 @@ ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
}
}
xhandle = xprefix_to_handler(xd->xprefix);
- if (!xhandle)
+ if (!xhandle || (xhandle->list && !xhandle->list(dentry)))
continue;
+ prefix = xhandle->prefix ?: xhandle->name;
+ prefix_len = strlen(prefix);
+ rc = prefix_len + xd->name_len + 1;
+
if (buffer) {
- rc = xhandle->list(xhandle, dentry, buffer + len,
- size - len, xd->xname,
- xd->name_len);
- } else {
- rc = xhandle->list(xhandle, dentry, NULL, 0,
- xd->xname, xd->name_len);
+ if (rc > size - len) {
+ rc = -ERANGE;
+ goto out;
+ }
+ memcpy(buffer, prefix, prefix_len);
+ buffer += prefix_len;
+ memcpy(buffer, xd->xname, xd->name_len);
+ buffer += xd->name_len;
+ *buffer++ = 0;
}
- if (rc < 0)
- goto out;
len += rc;
}
rc = len;
diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c
index a562da0d6..b2555ef07 100644
--- a/fs/jffs2/xattr_trusted.c
+++ b/fs/jffs2/xattr_trusted.c
@@ -20,8 +20,6 @@ static int jffs2_trusted_getxattr(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (!strcmp(name, ""))
- return -EINVAL;
return do_jffs2_getxattr(d_inode(dentry), JFFS2_XPREFIX_TRUSTED,
name, buffer, size);
}
@@ -30,28 +28,13 @@ static int jffs2_trusted_setxattr(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *buffer, size_t size, int flags)
{
- if (!strcmp(name, ""))
- return -EINVAL;
return do_jffs2_setxattr(d_inode(dentry), JFFS2_XPREFIX_TRUSTED,
name, buffer, size, flags);
}
-static size_t jffs2_trusted_listxattr(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_size, const char *name,
- size_t name_len)
+static bool jffs2_trusted_listxattr(struct dentry *dentry)
{
- size_t retlen = XATTR_TRUSTED_PREFIX_LEN + name_len + 1;
-
- if (!capable(CAP_SYS_ADMIN))
- return 0;
-
- if (list && retlen<=list_size) {
- strcpy(list, XATTR_TRUSTED_PREFIX);
- strcpy(list + XATTR_TRUSTED_PREFIX_LEN, name);
- }
-
- return retlen;
+ return capable(CAP_SYS_ADMIN);
}
const struct xattr_handler jffs2_trusted_xattr_handler = {
diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c
index cbc0472e5..539bd630b 100644
--- a/fs/jffs2/xattr_user.c
+++ b/fs/jffs2/xattr_user.c
@@ -20,8 +20,6 @@ static int jffs2_user_getxattr(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (!strcmp(name, ""))
- return -EINVAL;
return do_jffs2_getxattr(d_inode(dentry), JFFS2_XPREFIX_USER,
name, buffer, size);
}
@@ -30,30 +28,12 @@ static int jffs2_user_setxattr(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *buffer, size_t size, int flags)
{
- if (!strcmp(name, ""))
- return -EINVAL;
return do_jffs2_setxattr(d_inode(dentry), JFFS2_XPREFIX_USER,
name, buffer, size, flags);
}
-static size_t jffs2_user_listxattr(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_size, const char *name,
- size_t name_len)
-{
- size_t retlen = XATTR_USER_PREFIX_LEN + name_len + 1;
-
- if (list && retlen <= list_size) {
- strcpy(list, XATTR_USER_PREFIX);
- strcpy(list + XATTR_USER_PREFIX_LEN, name);
- }
-
- return retlen;
-}
-
const struct xattr_handler jffs2_user_xattr_handler = {
.prefix = XATTR_USER_PREFIX,
- .list = jffs2_user_listxattr,
.set = jffs2_user_setxattr,
.get = jffs2_user_getxattr
};
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 0c8ca830b..49456853e 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -40,10 +40,10 @@ struct posix_acl *jfs_get_acl(struct inode *inode, int type)
switch(type) {
case ACL_TYPE_ACCESS:
- ea_name = POSIX_ACL_XATTR_ACCESS;
+ ea_name = XATTR_NAME_POSIX_ACL_ACCESS;
break;
case ACL_TYPE_DEFAULT:
- ea_name = POSIX_ACL_XATTR_DEFAULT;
+ ea_name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
return ERR_PTR(-EINVAL);
@@ -82,7 +82,7 @@ static int __jfs_set_acl(tid_t tid, struct inode *inode, int type,
switch (type) {
case ACL_TYPE_ACCESS:
- ea_name = POSIX_ACL_XATTR_ACCESS;
+ ea_name = XATTR_NAME_POSIX_ACL_ACCESS;
if (acl) {
rc = posix_acl_equiv_mode(acl, &inode->i_mode);
if (rc < 0)
@@ -94,7 +94,7 @@ static int __jfs_set_acl(tid_t tid, struct inode *inode, int type,
}
break;
case ACL_TYPE_DEFAULT:
- ea_name = POSIX_ACL_XATTR_DEFAULT;
+ ea_name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
return -EINVAL;
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 0e026a7bd..4ce7735dd 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -38,17 +38,17 @@ int jfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
if (rc)
return rc;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (!(inode->i_state & I_DIRTY_ALL) ||
(datasync && !(inode->i_state & I_DIRTY_DATASYNC))) {
/* Make sure committed changes hit the disk */
jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return rc;
}
rc |= jfs_commit_inode(inode, 1);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return rc ? -EIO : 0;
}
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 41aa3ca6a..9d9bae63a 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -60,6 +60,7 @@ struct inode *jfs_iget(struct super_block *sb, unsigned long ino)
} else if (S_ISLNK(inode->i_mode)) {
if (inode->i_size >= IDATASIZE) {
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &jfs_aops;
} else {
inode->i_op = &jfs_fast_symlink_inode_operations;
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index 8db8b7d61..8653cac7e 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -96,7 +96,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
}
/* Lock against other parallel changes of flags */
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
jfs_get_inode_flags(jfs_inode);
oldflags = jfs_inode->mode2;
@@ -109,7 +109,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
((flags ^ oldflags) &
(JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
if (!capable(CAP_LINUX_IMMUTABLE)) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
err = -EPERM;
goto setflags_out;
}
@@ -120,7 +120,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
jfs_inode->mode2 = flags;
jfs_set_inode_flags(inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
setflags_out:
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index a69bdf2a1..a270cb7ff 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1835,17 +1835,16 @@ static int lbmLogInit(struct jfs_log * log)
for (i = 0; i < LOGPAGES;) {
char *buffer;
uint offset;
- struct page *page;
+ struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
- buffer = (char *) get_zeroed_page(GFP_KERNEL);
- if (buffer == NULL)
+ if (!page)
goto error;
- page = virt_to_page(buffer);
+ buffer = page_address(page);
for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
if (lbuf == NULL) {
if (offset == 0)
- free_page((unsigned long) buffer);
+ __free_page(page);
goto error;
}
if (offset) /* we already have one reference */
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 9d7551f5c..701f89370 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -983,6 +983,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
jfs_info("jfs_symlink: allocate extent ip:0x%p", ip);
ip->i_op = &jfs_symlink_inode_operations;
+ inode_nohighmem(ip);
ip->i_mapping->a_ops = &jfs_aops;
/*
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 8f9176caf..4f5d85ba8 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -792,7 +792,7 @@ static ssize_t jfs_quota_write(struct super_block *sb, int type,
struct buffer_head tmp_bh;
struct buffer_head *bh;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
while (towrite > 0) {
tocopy = sb->s_blocksize - offset < towrite ?
sb->s_blocksize - offset : towrite;
@@ -824,7 +824,7 @@ static ssize_t jfs_quota_write(struct super_block *sb, int type,
}
out:
if (len == towrite) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
if (inode->i_size < off+len-towrite)
@@ -832,7 +832,7 @@ out:
inode->i_version++;
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return len - towrite;
}
@@ -898,7 +898,7 @@ static int __init init_jfs_fs(void)
jfs_inode_cachep =
kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0,
- SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+ SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT,
init_once);
if (jfs_inode_cachep == NULL)
return -ENOMEM;
diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c
index 5929e2363..f8db4fde0 100644
--- a/fs/jfs/symlink.c
+++ b/fs/jfs/symlink.c
@@ -23,7 +23,7 @@
const struct inode_operations jfs_fast_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = simple_follow_link,
+ .get_link = simple_get_link,
.setattr = jfs_setattr,
.setxattr = jfs_setxattr,
.getxattr = jfs_getxattr,
@@ -33,8 +33,7 @@ const struct inode_operations jfs_fast_symlink_inode_operations = {
const struct inode_operations jfs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.setattr = jfs_setattr,
.setxattr = jfs_setxattr,
.getxattr = jfs_getxattr,
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 91e004518..996b7742c 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -541,14 +541,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
if (!kn)
goto err_out1;
- /*
- * If the ino of the sysfs entry created for a kmem cache gets
- * allocated from an ida layer, which is accounted to the memcg that
- * owns the cache, the memcg will get pinned forever. So do not account
- * ino ida allocations.
- */
- ret = ida_simple_get(&root->ino_ida, 1, 0,
- GFP_KERNEL | __GFP_NOACCOUNT);
+ ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL);
if (ret < 0)
goto err_out2;
kn->ino = ret;
@@ -694,6 +687,29 @@ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
return NULL;
}
+static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
+ const unsigned char *path,
+ const void *ns)
+{
+ static char path_buf[PATH_MAX]; /* protected by kernfs_mutex */
+ size_t len = strlcpy(path_buf, path, PATH_MAX);
+ char *p = path_buf;
+ char *name;
+
+ lockdep_assert_held(&kernfs_mutex);
+
+ if (len >= PATH_MAX)
+ return NULL;
+
+ while ((name = strsep(&p, "/")) && parent) {
+ if (*name == '\0')
+ continue;
+ parent = kernfs_find_ns(parent, name, ns);
+ }
+
+ return parent;
+}
+
/**
* kernfs_find_and_get_ns - find and get kernfs_node with the given name
* @parent: kernfs_node to search under
@@ -719,6 +735,29 @@ struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);
/**
+ * kernfs_walk_and_get_ns - find and get kernfs_node with the given path
+ * @parent: kernfs_node to search under
+ * @path: path to look for
+ * @ns: the namespace tag to use
+ *
+ * Look for kernfs_node with path @path under @parent and get a reference
+ * if found. This function may sleep and returns pointer to the found
+ * kernfs_node on success, %NULL on failure.
+ */
+struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent,
+ const char *path, const void *ns)
+{
+ struct kernfs_node *kn;
+
+ mutex_lock(&kernfs_mutex);
+ kn = kernfs_walk_ns(parent, path, ns);
+ kernfs_get(kn);
+ mutex_unlock(&kernfs_mutex);
+
+ return kn;
+}
+
+/**
* kernfs_create_root - create a new kernfs hierarchy
* @scops: optional syscall operations for the hierarchy
* @flags: KERNFS_ROOT_* flags
@@ -1472,9 +1511,9 @@ static loff_t kernfs_dir_fop_llseek(struct file *file, loff_t offset,
struct inode *inode = file_inode(file);
loff_t ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = generic_file_llseek(file, offset, whence);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index 756dd56aa..16405ae88 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -205,7 +205,7 @@ int kernfs_iop_removexattr(struct dentry *dentry, const char *name)
if (!attrs)
return -ENOMEM;
- return simple_xattr_remove(&attrs->xattrs, name);
+ return simple_xattr_set(&attrs->xattrs, name, NULL, 0, XATTR_REPLACE);
}
ssize_t kernfs_iop_getxattr(struct dentry *dentry, const char *name, void *buf,
@@ -230,7 +230,7 @@ ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size)
if (!attrs)
return -ENOMEM;
- return simple_xattr_list(&attrs->xattrs, buf, size);
+ return simple_xattr_list(d_inode(dentry), &attrs->xattrs, buf, size);
}
static inline void set_default_inode_attr(struct inode *inode, umode_t mode)
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
index db272528a..117b8b341 100644
--- a/fs/kernfs/symlink.c
+++ b/fs/kernfs/symlink.c
@@ -112,18 +112,25 @@ static int kernfs_getlink(struct dentry *dentry, char *path)
return error;
}
-static const char *kernfs_iop_follow_link(struct dentry *dentry, void **cookie)
+static const char *kernfs_iop_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- int error = -ENOMEM;
- unsigned long page = get_zeroed_page(GFP_KERNEL);
- if (!page)
+ char *body;
+ int error;
+
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+ body = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!body)
return ERR_PTR(-ENOMEM);
- error = kernfs_getlink(dentry, (char *)page);
+ error = kernfs_getlink(dentry, body);
if (unlikely(error < 0)) {
- free_page((unsigned long)page);
+ kfree(body);
return ERR_PTR(error);
}
- return *cookie = (char *)page;
+ set_delayed_call(done, kfree_link, body);
+ return body;
}
const struct inode_operations kernfs_symlink_iops = {
@@ -132,8 +139,7 @@ const struct inode_operations kernfs_symlink_iops = {
.getxattr = kernfs_iop_getxattr,
.listxattr = kernfs_iop_listxattr,
.readlink = generic_readlink,
- .follow_link = kernfs_iop_follow_link,
- .put_link = free_page_put_link,
+ .get_link = kernfs_iop_get_link,
.setattr = kernfs_iop_setattr,
.getattr = kernfs_iop_getattr,
.permission = kernfs_iop_permission,
diff --git a/fs/libfs.c b/fs/libfs.c
index c7cbfb092..0ca80b2af 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -89,7 +89,7 @@ EXPORT_SYMBOL(dcache_dir_close);
loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
{
struct dentry *dentry = file->f_path.dentry;
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
switch (whence) {
case 1:
offset += file->f_pos;
@@ -97,7 +97,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
if (offset >= 0)
break;
default:
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
return -EINVAL;
}
if (offset != file->f_pos) {
@@ -124,7 +124,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
spin_unlock(&dentry->d_lock);
}
}
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
return offset;
}
EXPORT_SYMBOL(dcache_dir_lseek);
@@ -941,7 +941,7 @@ int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
if (err)
return err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = sync_mapping_buffers(inode->i_mapping);
if (!(inode->i_state & I_DIRTY_ALL))
goto out;
@@ -953,7 +953,7 @@ int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
ret = err;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
EXPORT_SYMBOL(__generic_file_fsync);
@@ -1019,17 +1019,12 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
}
EXPORT_SYMBOL(noop_fsync);
-void kfree_put_link(struct inode *unused, void *cookie)
+/* Because kfree isn't assignment-compatible with void(void*) ;-/ */
+void kfree_link(void *p)
{
- kfree(cookie);
+ kfree(p);
}
-EXPORT_SYMBOL(kfree_put_link);
-
-void free_page_put_link(struct inode *unused, void *cookie)
-{
- free_page((unsigned long) cookie);
-}
-EXPORT_SYMBOL(free_page_put_link);
+EXPORT_SYMBOL(kfree_link);
/*
* nop .set_page_dirty method so that people can use .page_mkwrite on
@@ -1092,14 +1087,15 @@ simple_nosetlease(struct file *filp, long arg, struct file_lock **flp,
}
EXPORT_SYMBOL(simple_nosetlease);
-const char *simple_follow_link(struct dentry *dentry, void **cookie)
+const char *simple_get_link(struct dentry *dentry, struct inode *inode,
+ struct delayed_call *done)
{
- return d_inode(dentry)->i_link;
+ return inode->i_link;
}
-EXPORT_SYMBOL(simple_follow_link);
+EXPORT_SYMBOL(simple_get_link);
const struct inode_operations simple_symlink_inode_operations = {
- .follow_link = simple_follow_link,
+ .get_link = simple_get_link,
.readlink = generic_readlink
};
EXPORT_SYMBOL(simple_symlink_inode_operations);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 5f31ebd96..154a107cd 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -25,13 +25,17 @@
#include <linux/mutex.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
+#include <linux/inetdevice.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svcsock.h>
+#include <linux/sunrpc/svc_xprt.h>
#include <net/ip.h>
+#include <net/addrconf.h>
+#include <net/ipv6.h>
#include <linux/lockd/lockd.h>
#include <linux/nfs.h>
@@ -44,7 +48,7 @@
static struct svc_program nlmsvc_program;
-struct nlmsvc_binding * nlmsvc_ops;
+const struct nlmsvc_binding *nlmsvc_ops;
EXPORT_SYMBOL_GPL(nlmsvc_ops);
static DEFINE_MUTEX(nlmsvc_mutex);
@@ -90,8 +94,7 @@ static unsigned long get_lockd_grace_period(void)
static void grace_ender(struct work_struct *grace)
{
- struct delayed_work *dwork = container_of(grace, struct delayed_work,
- work);
+ struct delayed_work *dwork = to_delayed_work(grace);
struct lockd_net *ln = container_of(dwork, struct lockd_net,
grace_period_end);
@@ -279,6 +282,68 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net)
}
}
+static int lockd_inetaddr_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
+ struct sockaddr_in sin;
+
+ if (event != NETDEV_DOWN)
+ goto out;
+
+ if (nlmsvc_rqst) {
+ dprintk("lockd_inetaddr_event: removed %pI4\n",
+ &ifa->ifa_local);
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = ifa->ifa_local;
+ svc_age_temp_xprts_now(nlmsvc_rqst->rq_server,
+ (struct sockaddr *)&sin);
+ }
+
+out:
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block lockd_inetaddr_notifier = {
+ .notifier_call = lockd_inetaddr_event,
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int lockd_inet6addr_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
+ struct sockaddr_in6 sin6;
+
+ if (event != NETDEV_DOWN)
+ goto out;
+
+ if (nlmsvc_rqst) {
+ dprintk("lockd_inet6addr_event: removed %pI6\n", &ifa->addr);
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_addr = ifa->addr;
+ svc_age_temp_xprts_now(nlmsvc_rqst->rq_server,
+ (struct sockaddr *)&sin6);
+ }
+
+out:
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block lockd_inet6addr_notifier = {
+ .notifier_call = lockd_inet6addr_event,
+};
+#endif
+
+static void lockd_svc_exit_thread(void)
+{
+ unregister_inetaddr_notifier(&lockd_inetaddr_notifier);
+#if IS_ENABLED(CONFIG_IPV6)
+ unregister_inet6addr_notifier(&lockd_inet6addr_notifier);
+#endif
+ svc_exit_thread(nlmsvc_rqst);
+}
+
static int lockd_start_svc(struct svc_serv *serv)
{
int error;
@@ -315,7 +380,7 @@ static int lockd_start_svc(struct svc_serv *serv)
return 0;
out_task:
- svc_exit_thread(nlmsvc_rqst);
+ lockd_svc_exit_thread();
nlmsvc_task = NULL;
out_rqst:
nlmsvc_rqst = NULL;
@@ -360,6 +425,10 @@ static struct svc_serv *lockd_create_svc(void)
printk(KERN_WARNING "lockd_up: create service failed\n");
return ERR_PTR(-ENOMEM);
}
+ register_inetaddr_notifier(&lockd_inetaddr_notifier);
+#if IS_ENABLED(CONFIG_IPV6)
+ register_inet6addr_notifier(&lockd_inet6addr_notifier);
+#endif
dprintk("lockd_up: service created\n");
return serv;
}
@@ -428,7 +497,7 @@ lockd_down(struct net *net)
}
kthread_stop(nlmsvc_task);
dprintk("lockd_down: service stopped\n");
- svc_exit_thread(nlmsvc_rqst);
+ lockd_svc_exit_thread();
dprintk("lockd_down: service destroyed\n");
nlmsvc_task = NULL;
nlmsvc_rqst = NULL;
diff --git a/fs/locks.c b/fs/locks.c
index 6333263b7..7c5f91be9 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -119,7 +119,6 @@
#include <linux/fdtable.h>
#include <linux/fs.h>
#include <linux/init.h>
-#include <linux/module.h>
#include <linux/security.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
@@ -230,16 +229,44 @@ locks_get_lock_context(struct inode *inode, int type)
ctx = smp_load_acquire(&inode->i_flctx);
}
out:
+ trace_locks_get_lock_context(inode, type, ctx);
return ctx;
}
+static void
+locks_dump_ctx_list(struct list_head *list, char *list_type)
+{
+ struct file_lock *fl;
+
+ list_for_each_entry(fl, list, fl_list) {
+ pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
+ }
+}
+
+static void
+locks_check_ctx_lists(struct inode *inode)
+{
+ struct file_lock_context *ctx = inode->i_flctx;
+
+ if (unlikely(!list_empty(&ctx->flc_flock) ||
+ !list_empty(&ctx->flc_posix) ||
+ !list_empty(&ctx->flc_lease))) {
+ pr_warn("Leaked locks on dev=0x%x:0x%x ino=0x%lx:\n",
+ MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
+ inode->i_ino);
+ locks_dump_ctx_list(&ctx->flc_flock, "FLOCK");
+ locks_dump_ctx_list(&ctx->flc_posix, "POSIX");
+ locks_dump_ctx_list(&ctx->flc_lease, "LEASE");
+ }
+}
+
void
-locks_free_lock_context(struct file_lock_context *ctx)
+locks_free_lock_context(struct inode *inode)
{
- if (ctx) {
- WARN_ON_ONCE(!list_empty(&ctx->flc_flock));
- WARN_ON_ONCE(!list_empty(&ctx->flc_posix));
- WARN_ON_ONCE(!list_empty(&ctx->flc_lease));
+ struct file_lock_context *ctx = inode->i_flctx;
+
+ if (unlikely(ctx)) {
+ locks_check_ctx_lists(inode);
kmem_cache_free(flctx_cache, ctx);
}
}
@@ -934,7 +961,8 @@ out:
return error;
}
-static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
+static int posix_lock_inode(struct inode *inode, struct file_lock *request,
+ struct file_lock *conflock)
{
struct file_lock *fl, *tmp;
struct file_lock *new_fl = NULL;
@@ -1142,6 +1170,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
if (new_fl2)
locks_free_lock(new_fl2);
locks_dispose_list(&dispose);
+ trace_posix_lock_inode(inode, request, error);
+
return error;
}
@@ -1162,7 +1192,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
int posix_lock_file(struct file *filp, struct file_lock *fl,
struct file_lock *conflock)
{
- return __posix_lock_file(file_inode(filp), fl, conflock);
+ return posix_lock_inode(file_inode(filp), fl, conflock);
}
EXPORT_SYMBOL(posix_lock_file);
@@ -1178,7 +1208,7 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
int error;
might_sleep ();
for (;;) {
- error = __posix_lock_file(inode, fl, NULL);
+ error = posix_lock_inode(inode, fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
@@ -1191,6 +1221,7 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
return error;
}
+#ifdef CONFIG_MANDATORY_FILE_LOCKING
/**
* locks_mandatory_locked - Check for an active lock
* @file: the file to check
@@ -1227,20 +1258,16 @@ int locks_mandatory_locked(struct file *file)
/**
* locks_mandatory_area - Check for a conflicting lock
- * @read_write: %FLOCK_VERIFY_WRITE for exclusive access, %FLOCK_VERIFY_READ
- * for shared
- * @inode: the file to check
+ * @inode: the file to check
* @filp: how the file was opened (if it was)
- * @offset: start of area to check
- * @count: length of area to check
+ * @start: first byte in the file to check
+ * @end: lastbyte in the file to check
+ * @type: %F_WRLCK for a write lock, else %F_RDLCK
*
* Searches the inode's list of locks to find any POSIX locks which conflict.
- * This function is called from rw_verify_area() and
- * locks_verify_truncate().
*/
-int locks_mandatory_area(int read_write, struct inode *inode,
- struct file *filp, loff_t offset,
- size_t count)
+int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start,
+ loff_t end, unsigned char type)
{
struct file_lock fl;
int error;
@@ -1252,15 +1279,15 @@ int locks_mandatory_area(int read_write, struct inode *inode,
fl.fl_flags = FL_POSIX | FL_ACCESS;
if (filp && !(filp->f_flags & O_NONBLOCK))
sleep = true;
- fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK;
- fl.fl_start = offset;
- fl.fl_end = offset + count - 1;
+ fl.fl_type = type;
+ fl.fl_start = start;
+ fl.fl_end = end;
for (;;) {
if (filp) {
fl.fl_owner = filp;
fl.fl_flags &= ~FL_SLEEP;
- error = __posix_lock_file(inode, &fl, NULL);
+ error = posix_lock_inode(inode, &fl, NULL);
if (!error)
break;
}
@@ -1268,7 +1295,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
if (sleep)
fl.fl_flags |= FL_SLEEP;
fl.fl_owner = current->files;
- error = __posix_lock_file(inode, &fl, NULL);
+ error = posix_lock_inode(inode, &fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
@@ -1289,6 +1316,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
}
EXPORT_SYMBOL(locks_mandatory_area);
+#endif /* CONFIG_MANDATORY_FILE_LOCKING */
static void lease_clear_pending(struct file_lock *fl, int arg)
{
@@ -1503,12 +1531,10 @@ void lease_get_mtime(struct inode *inode, struct timespec *time)
ctx = smp_load_acquire(&inode->i_flctx);
if (ctx && !list_empty_careful(&ctx->flc_lease)) {
spin_lock(&ctx->flc_lock);
- if (!list_empty(&ctx->flc_lease)) {
- fl = list_first_entry(&ctx->flc_lease,
- struct file_lock, fl_list);
- if (fl->fl_type == F_WRLCK)
- has_lease = true;
- }
+ fl = list_first_entry_or_null(&ctx->flc_lease,
+ struct file_lock, fl_list);
+ if (fl && (fl->fl_type == F_WRLCK))
+ has_lease = true;
spin_unlock(&ctx->flc_lock);
}
@@ -1624,12 +1650,12 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
* bother, maybe that's a sign this just isn't a good file to
* hand out a delegation on.
*/
- if (is_deleg && !mutex_trylock(&inode->i_mutex))
+ if (is_deleg && !inode_trylock(inode))
return -EAGAIN;
if (is_deleg && arg == F_WRLCK) {
/* Write delegations are not currently supported: */
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
WARN_ON_ONCE(1);
return -EINVAL;
}
@@ -1706,7 +1732,7 @@ out:
spin_unlock(&ctx->flc_lock);
locks_dispose_list(&dispose);
if (is_deleg)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (!error && !my_fl)
*flp = NULL;
return error;
@@ -2165,6 +2191,8 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
if (file_lock == NULL)
return -ENOLCK;
+ inode = file_inode(filp);
+
/*
* This might block, so we do it before checking the inode.
*/
@@ -2172,8 +2200,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
if (copy_from_user(&flock, l, sizeof(flock)))
goto out;
- inode = file_inode(filp);
-
/* Don't allow mandatory locks on files that may be memory mapped
* and shared.
*/
@@ -2220,10 +2246,12 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
error = do_lock_file_wait(filp, cmd, file_lock);
/*
- * Attempt to detect a close/fcntl race and recover by
- * releasing the lock that was just acquired.
+ * Attempt to detect a close/fcntl race and recover by releasing the
+ * lock that was just acquired. There is no need to do that when we're
+ * unlocking though, or for OFD locks.
*/
- if (!error && file_lock->fl_type != F_UNLCK) {
+ if (!error && file_lock->fl_type != F_UNLCK &&
+ !(file_lock->fl_flags & FL_OFDLCK)) {
/*
* We need that spin_lock here - it prevents reordering between
* update of i_flctx->flc_posix and check for it done in
@@ -2240,6 +2268,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
}
}
out:
+ trace_fcntl_setlk(inode, file_lock, error);
locks_free_lock(file_lock);
return error;
}
@@ -2362,10 +2391,12 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
error = do_lock_file_wait(filp, cmd, file_lock);
/*
- * Attempt to detect a close/fcntl race and recover by
- * releasing the lock that was just acquired.
+ * Attempt to detect a close/fcntl race and recover by releasing the
+ * lock that was just acquired. There is no need to do that when we're
+ * unlocking though, or for OFD locks.
*/
- if (!error && file_lock->fl_type != F_UNLCK) {
+ if (!error && file_lock->fl_type != F_UNLCK &&
+ !(file_lock->fl_flags & FL_OFDLCK)) {
/*
* We need that spin_lock here - it prevents reordering between
* update of i_flctx->flc_posix and check for it done in
@@ -2394,6 +2425,7 @@ out:
*/
void locks_remove_posix(struct file *filp, fl_owner_t owner)
{
+ int error;
struct file_lock lock;
struct file_lock_context *ctx;
@@ -2416,10 +2448,11 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
lock.fl_ops = NULL;
lock.fl_lmops = NULL;
- vfs_lock_file(filp, F_SETLK, &lock, NULL);
+ error = vfs_lock_file(filp, F_SETLK, &lock, NULL);
if (lock.fl_ops && lock.fl_ops->fl_release_private)
lock.fl_ops->fl_release_private(&lock);
+ trace_locks_remove_posix(file_inode(filp), &lock, error);
}
EXPORT_SYMBOL(locks_remove_posix);
@@ -2715,7 +2748,7 @@ static int __init proc_locks_init(void)
proc_create("locks", 0, NULL, &proc_locks_operations);
return 0;
}
-module_init(proc_locks_init);
+fs_initcall(proc_locks_init);
#endif
static int __init filelock_init(void)
diff --git a/fs/logfs/Kconfig b/fs/logfs/Kconfig
index 09ed066c0..2b4503163 100644
--- a/fs/logfs/Kconfig
+++ b/fs/logfs/Kconfig
@@ -1,6 +1,6 @@
config LOGFS
tristate "LogFS file system"
- depends on (MTD || BLOCK)
+ depends on MTD || (!MTD && BLOCK)
select ZLIB_INFLATE
select ZLIB_DEFLATE
select CRC32
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index f9b45d46d..542468e9b 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -528,7 +528,8 @@ static int logfs_symlink(struct inode *dir, struct dentry *dentry,
if (IS_ERR(inode))
return PTR_ERR(inode);
- inode->i_op = &logfs_symlink_iops;
+ inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &logfs_reg_aops;
return __logfs_create(dir, dentry, inode, target, destlen);
@@ -776,12 +777,6 @@ fail:
return -EIO;
}
-const struct inode_operations logfs_symlink_iops = {
- .readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
-};
-
const struct inode_operations logfs_dir_iops = {
.create = logfs_create,
.link = logfs_link,
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index 1a6f0167b..61eaeb1b6 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -204,12 +204,12 @@ long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
if (err)
return err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
oldflags = li->li_flags;
flags &= LOGFS_FL_USER_MODIFIABLE;
flags |= oldflags & ~LOGFS_FL_USER_MODIFIABLE;
li->li_flags = flags;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
inode->i_ctime = CURRENT_TIME;
mark_inode_dirty_sync(inode);
@@ -230,11 +230,11 @@ int logfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
logfs_get_wblocks(sb, NULL, WF_LOCK);
logfs_write_anchor(sb);
logfs_put_wblocks(sb, NULL, WF_LOCK);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return 0;
}
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index af49e2d69..db9cfc598 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -64,7 +64,8 @@ static void logfs_inode_setops(struct inode *inode)
inode->i_mapping->a_ops = &logfs_reg_aops;
break;
case S_IFLNK:
- inode->i_op = &logfs_symlink_iops;
+ inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &logfs_reg_aops;
break;
case S_IFSOCK: /* fall through */
@@ -408,7 +409,8 @@ const struct super_operations logfs_super_operations = {
int logfs_init_inode_cache(void)
{
logfs_inode_cache = kmem_cache_create("logfs_inode_cache",
- sizeof(struct logfs_inode), 0, SLAB_RECLAIM_ACCOUNT,
+ sizeof(struct logfs_inode), 0,
+ SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT,
logfs_init_once);
if (!logfs_inode_cache)
return -ENOMEM;
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 5f0937609..27d040e35 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -302,7 +302,7 @@ struct logfs_block {
struct inode *inode;
struct logfs_transaction *ta;
unsigned long alias_map[LOGFS_BLOCK_FACTOR / BITS_PER_LONG];
- struct logfs_block_ops *ops;
+ const struct logfs_block_ops *ops;
int full;
int partial;
int reserved_bytes;
@@ -485,7 +485,7 @@ static inline int logfs_get_sb_bdev(struct logfs_super *s,
#endif
/* dev_mtd.c */
-#ifdef CONFIG_MTD
+#if IS_ENABLED(CONFIG_MTD)
int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr);
#else
static inline int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
@@ -495,7 +495,6 @@ static inline int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
#endif
/* dir.c */
-extern const struct inode_operations logfs_symlink_iops;
extern const struct inode_operations logfs_dir_iops;
extern const struct file_operations logfs_dir_fops;
int logfs_replay_journal(struct super_block *sb);
@@ -579,7 +578,7 @@ int logfs_exist_block(struct inode *inode, u64 bix);
int get_page_reserve(struct inode *inode, struct page *page);
void logfs_get_wblocks(struct super_block *sb, struct page *page, int lock);
void logfs_put_wblocks(struct super_block *sb, struct page *page, int lock);
-extern struct logfs_block_ops indirect_block_ops;
+extern const struct logfs_block_ops indirect_block_ops;
/* segment.c */
int logfs_erase_segment(struct super_block *sb, u32 ofs, int ensure_erase);
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 380d86e1a..20973c9e5 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -569,13 +569,13 @@ static void indirect_free_block(struct super_block *sb,
}
-static struct logfs_block_ops inode_block_ops = {
+static const struct logfs_block_ops inode_block_ops = {
.write_block = inode_write_block,
.free_block = inode_free_block,
.write_alias = inode_write_alias,
};
-struct logfs_block_ops indirect_block_ops = {
+const struct logfs_block_ops indirect_block_ops = {
.write_block = indirect_write_block,
.free_block = indirect_free_block,
.write_alias = indirect_write_alias,
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index 6de0fbfc6..d270e4b2a 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -197,7 +197,7 @@ static int btree_write_alias(struct super_block *sb, struct logfs_block *block,
return 0;
}
-static struct logfs_block_ops btree_block_ops = {
+static const struct logfs_block_ops btree_block_ops = {
.write_block = btree_write_block,
.free_block = __free_block,
.write_alias = btree_write_alias,
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 086cd0a61..f975d667c 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -91,7 +91,7 @@ static int __init init_inodecache(void)
minix_inode_cachep = kmem_cache_create("minix_inode_cache",
sizeof(struct minix_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
init_once);
if (minix_inode_cachep == NULL)
return -ENOMEM;
@@ -435,8 +435,7 @@ static const struct address_space_operations minix_aops = {
static const struct inode_operations minix_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.getattr = minix_getattr,
};
@@ -452,6 +451,7 @@ void minix_set_inode(struct inode *inode, dev_t rdev)
inode->i_mapping->a_ops = &minix_aops;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &minix_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &minix_aops;
} else
init_special_inode(inode, inode->i_mode, rdev);
diff --git a/fs/minix/itree_v1.c b/fs/minix/itree_v1.c
index 282e15ad8..46ca39d6c 100644
--- a/fs/minix/itree_v1.c
+++ b/fs/minix/itree_v1.c
@@ -24,16 +24,15 @@ static inline block_t *i_data(struct inode *inode)
static int block_to_path(struct inode * inode, long block, int offsets[DEPTH])
{
int n = 0;
- char b[BDEVNAME_SIZE];
if (block < 0) {
- printk("MINIX-fs: block_to_path: block %ld < 0 on dev %s\n",
- block, bdevname(inode->i_sb->s_bdev, b));
+ printk("MINIX-fs: block_to_path: block %ld < 0 on dev %pg\n",
+ block, inode->i_sb->s_bdev);
} else if (block >= (minix_sb(inode->i_sb)->s_max_size/BLOCK_SIZE)) {
if (printk_ratelimit())
printk("MINIX-fs: block_to_path: "
- "block %ld too big on dev %s\n",
- block, bdevname(inode->i_sb->s_bdev, b));
+ "block %ld too big on dev %pg\n",
+ block, inode->i_sb->s_bdev);
} else if (block < 7) {
offsets[n++] = block;
} else if ((block -= 7) < 512) {
diff --git a/fs/minix/itree_v2.c b/fs/minix/itree_v2.c
index 78e2d93e5..1ee101352 100644
--- a/fs/minix/itree_v2.c
+++ b/fs/minix/itree_v2.c
@@ -26,18 +26,17 @@ static inline block_t *i_data(struct inode *inode)
static int block_to_path(struct inode * inode, long block, int offsets[DEPTH])
{
int n = 0;
- char b[BDEVNAME_SIZE];
struct super_block *sb = inode->i_sb;
if (block < 0) {
- printk("MINIX-fs: block_to_path: block %ld < 0 on dev %s\n",
- block, bdevname(sb->s_bdev, b));
+ printk("MINIX-fs: block_to_path: block %ld < 0 on dev %pg\n",
+ block, sb->s_bdev);
} else if ((u64)block * (u64)sb->s_blocksize >=
minix_sb(sb)->s_max_size) {
if (printk_ratelimit())
printk("MINIX-fs: block_to_path: "
- "block %ld too big on dev %s\n",
- block, bdevname(sb->s_bdev, b));
+ "block %ld too big on dev %pg\n",
+ block, sb->s_bdev);
} else if (block < DIRCOUNT) {
offsets[n++] = block;
} else if ((block -= DIRCOUNT) < INDIRCOUNT(sb)) {
diff --git a/fs/namei.c b/fs/namei.c
index d8ee4da93..9c590e0f6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -505,13 +505,13 @@ struct nameidata {
int total_link_count;
struct saved {
struct path link;
- void *cookie;
+ struct delayed_call done;
const char *name;
- struct inode *inode;
unsigned seq;
} *stack, internal[EMBEDDED_LEVELS];
struct filename *name;
struct nameidata *saved;
+ struct inode *link_inode;
unsigned root_seq;
int dfd;
};
@@ -534,10 +534,8 @@ static void restore_nameidata(void)
current->nameidata = old;
if (old)
old->total_link_count = now->total_link_count;
- if (now->stack != now->internal) {
+ if (now->stack != now->internal)
kfree(now->stack);
- now->stack = now->internal;
- }
}
static int __nd_alloc_stack(struct nameidata *nd)
@@ -592,11 +590,8 @@ static void drop_links(struct nameidata *nd)
int i = nd->depth;
while (i--) {
struct saved *last = nd->stack + i;
- struct inode *inode = last->inode;
- if (last->cookie && inode->i_op->put_link) {
- inode->i_op->put_link(inode, last->cookie);
- last->cookie = NULL;
- }
+ do_delayed_call(&last->done);
+ clear_delayed_call(&last->done);
}
}
@@ -657,7 +652,7 @@ static bool legitimize_links(struct nameidata *nd)
* Path walking has 2 modes, rcu-walk and ref-walk (see
* Documentation/filesystems/path-lookup.txt). In situations when we can't
* continue in RCU mode, we attempt to drop out of rcu-walk mode and grab
- * normal reference counts on dentries and vfsmounts to transition to rcu-walk
+ * normal reference counts on dentries and vfsmounts to transition to ref-walk
* mode. Refcounts are grabbed at the last known good point before rcu-walk
* got stuck, so ref-walk may continue from there. If this is not successful
* (eg. a seqcount has changed), then failure is returned and it's up to caller
@@ -807,19 +802,19 @@ static int complete_walk(struct nameidata *nd)
static void set_root(struct nameidata *nd)
{
- get_fs_root(current->fs, &nd->root);
-}
-
-static void set_root_rcu(struct nameidata *nd)
-{
struct fs_struct *fs = current->fs;
- unsigned seq;
- do {
- seq = read_seqcount_begin(&fs->seq);
- nd->root = fs->root;
- nd->root_seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
- } while (read_seqcount_retry(&fs->seq, seq));
+ if (nd->flags & LOOKUP_RCU) {
+ unsigned seq;
+
+ do {
+ seq = read_seqcount_begin(&fs->seq);
+ nd->root = fs->root;
+ nd->root_seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
+ } while (read_seqcount_retry(&fs->seq, seq));
+ } else {
+ get_fs_root(fs, &nd->root);
+ }
}
static void path_put_conditional(struct path *path, struct nameidata *nd)
@@ -841,8 +836,28 @@ static inline void path_to_nameidata(const struct path *path,
nd->path.dentry = path->dentry;
}
+static int nd_jump_root(struct nameidata *nd)
+{
+ if (nd->flags & LOOKUP_RCU) {
+ struct dentry *d;
+ nd->path = nd->root;
+ d = nd->path.dentry;
+ nd->inode = d->d_inode;
+ nd->seq = nd->root_seq;
+ if (unlikely(read_seqcount_retry(&d->d_seq, nd->seq)))
+ return -ECHILD;
+ } else {
+ path_put(&nd->path);
+ nd->path = nd->root;
+ path_get(&nd->path);
+ nd->inode = nd->path.dentry->d_inode;
+ }
+ nd->flags |= LOOKUP_JUMPED;
+ return 0;
+}
+
/*
- * Helper to directly jump to a known parsed path from ->follow_link,
+ * Helper to directly jump to a known parsed path from ->get_link,
* caller must have taken a reference to path beforehand.
*/
void nd_jump_link(struct path *path)
@@ -858,9 +873,7 @@ void nd_jump_link(struct path *path)
static inline void put_link(struct nameidata *nd)
{
struct saved *last = nd->stack + --nd->depth;
- struct inode *inode = last->inode;
- if (last->cookie && inode->i_op->put_link)
- inode->i_op->put_link(inode, last->cookie);
+ do_delayed_call(&last->done);
if (!(nd->flags & LOOKUP_RCU))
path_put(&last->link);
}
@@ -892,7 +905,7 @@ static inline int may_follow_link(struct nameidata *nd)
return 0;
/* Allowed if owner and follower match. */
- inode = nd->stack[0].inode;
+ inode = nd->link_inode;
if (uid_eq(current_cred()->fsuid, inode->i_uid))
return 0;
@@ -983,7 +996,7 @@ const char *get_link(struct nameidata *nd)
{
struct saved *last = nd->stack + nd->depth - 1;
struct dentry *dentry = last->link.dentry;
- struct inode *inode = last->inode;
+ struct inode *inode = nd->link_inode;
int error;
const char *res;
@@ -1004,36 +1017,27 @@ const char *get_link(struct nameidata *nd)
nd->last_type = LAST_BIND;
res = inode->i_link;
if (!res) {
+ const char * (*get)(struct dentry *, struct inode *,
+ struct delayed_call *);
+ get = inode->i_op->get_link;
if (nd->flags & LOOKUP_RCU) {
- if (unlikely(unlazy_walk(nd, NULL, 0)))
- return ERR_PTR(-ECHILD);
+ res = get(NULL, inode, &last->done);
+ if (res == ERR_PTR(-ECHILD)) {
+ if (unlikely(unlazy_walk(nd, NULL, 0)))
+ return ERR_PTR(-ECHILD);
+ res = get(dentry, inode, &last->done);
+ }
+ } else {
+ res = get(dentry, inode, &last->done);
}
- res = inode->i_op->follow_link(dentry, &last->cookie);
- if (IS_ERR_OR_NULL(res)) {
- last->cookie = NULL;
+ if (IS_ERR_OR_NULL(res))
return res;
- }
}
if (*res == '/') {
- if (nd->flags & LOOKUP_RCU) {
- struct dentry *d;
- if (!nd->root.mnt)
- set_root_rcu(nd);
- nd->path = nd->root;
- d = nd->path.dentry;
- nd->inode = d->d_inode;
- nd->seq = nd->root_seq;
- if (unlikely(read_seqcount_retry(&d->d_seq, nd->seq)))
- return ERR_PTR(-ECHILD);
- } else {
- if (!nd->root.mnt)
- set_root(nd);
- path_put(&nd->path);
- nd->path = nd->root;
- path_get(&nd->root);
- nd->inode = nd->path.dentry->d_inode;
- }
- nd->flags |= LOOKUP_JUMPED;
+ if (!nd->root.mnt)
+ set_root(nd);
+ if (unlikely(nd_jump_root(nd)))
+ return ERR_PTR(-ECHILD);
while (unlikely(*++res == '/'))
;
}
@@ -1294,8 +1298,6 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
static int follow_dotdot_rcu(struct nameidata *nd)
{
struct inode *inode = nd->inode;
- if (!nd->root.mnt)
- set_root_rcu(nd);
while (1) {
if (path_equal(&nd->path, &nd->root))
@@ -1415,9 +1417,6 @@ static void follow_mount(struct path *path)
static int follow_dotdot(struct nameidata *nd)
{
- if (!nd->root.mnt)
- set_root(nd);
-
while(1) {
struct dentry *old = nd->path.dentry;
@@ -1630,9 +1629,9 @@ static int lookup_slow(struct nameidata *nd, struct path *path)
parent = nd->path.dentry;
BUG_ON(nd->inode != parent->d_inode);
- mutex_lock(&parent->d_inode->i_mutex);
+ inode_lock(parent->d_inode);
dentry = __lookup_hash(&nd->last, parent, nd->flags);
- mutex_unlock(&parent->d_inode->i_mutex);
+ inode_unlock(parent->d_inode);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
path->mnt = nd->path.mnt;
@@ -1655,6 +1654,8 @@ static inline int may_lookup(struct nameidata *nd)
static inline int handle_dots(struct nameidata *nd, int type)
{
if (type == LAST_DOTDOT) {
+ if (!nd->root.mnt)
+ set_root(nd);
if (nd->flags & LOOKUP_RCU) {
return follow_dotdot_rcu(nd);
} else
@@ -1691,8 +1692,8 @@ static int pick_link(struct nameidata *nd, struct path *link,
last = nd->stack + nd->depth++;
last->link = *link;
- last->cookie = NULL;
- last->inode = inode;
+ clear_delayed_call(&last->done);
+ nd->link_inode = inode;
last->seq = seq;
return 1;
}
@@ -2025,18 +2026,19 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
}
nd->root.mnt = NULL;
+ nd->path.mnt = NULL;
+ nd->path.dentry = NULL;
nd->m_seq = read_seqbegin(&mount_lock);
if (*s == '/') {
- if (flags & LOOKUP_RCU) {
+ if (flags & LOOKUP_RCU)
rcu_read_lock();
- set_root_rcu(nd);
- nd->seq = nd->root_seq;
- } else {
- set_root(nd);
- path_get(&nd->root);
- }
- nd->path = nd->root;
+ set_root(nd);
+ if (likely(!nd_jump_root(nd)))
+ return s;
+ nd->root.mnt = NULL;
+ rcu_read_unlock();
+ return ERR_PTR(-ECHILD);
} else if (nd->dfd == AT_FDCWD) {
if (flags & LOOKUP_RCU) {
struct fs_struct *fs = current->fs;
@@ -2047,11 +2049,14 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
do {
seq = read_seqcount_begin(&fs->seq);
nd->path = fs->pwd;
+ nd->inode = nd->path.dentry->d_inode;
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
} while (read_seqcount_retry(&fs->seq, seq));
} else {
get_fs_pwd(current->fs, &nd->path);
+ nd->inode = nd->path.dentry->d_inode;
}
+ return s;
} else {
/* Caller must check execute permissions on the starting path component */
struct fd f = fdget_raw(nd->dfd);
@@ -2081,16 +2086,6 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
fdput(f);
return s;
}
-
- nd->inode = nd->path.dentry->d_inode;
- if (!(flags & LOOKUP_RCU))
- return s;
- if (likely(!read_seqcount_retry(&nd->path.dentry->d_seq, nd->seq)))
- return s;
- if (!(nd->flags & LOOKUP_ROOT))
- nd->root.mnt = NULL;
- rcu_read_unlock();
- return ERR_PTR(-ECHILD);
}
static const char *trailing_symlink(struct nameidata *nd)
@@ -2239,10 +2234,10 @@ struct dentry *kern_path_locked(const char *name, struct path *path)
putname(filename);
return ERR_PTR(-EINVAL);
}
- mutex_lock_nested(&path->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
d = __lookup_hash(&last, path->dentry, 0);
if (IS_ERR(d)) {
- mutex_unlock(&path->dentry->d_inode->i_mutex);
+ inode_unlock(path->dentry->d_inode);
path_put(path);
}
putname(filename);
@@ -2283,6 +2278,8 @@ EXPORT_SYMBOL(vfs_path_lookup);
*
* Note that this routine is purely a helper for filesystem usage and should
* not be called by generic code.
+ *
+ * The caller must hold base->i_mutex.
*/
struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
{
@@ -2290,7 +2287,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
unsigned int c;
int err;
- WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
+ WARN_ON_ONCE(!inode_is_locked(base->d_inode));
this.name = name;
this.len = len;
@@ -2326,6 +2323,75 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
}
EXPORT_SYMBOL(lookup_one_len);
+/**
+ * lookup_one_len_unlocked - filesystem helper to lookup single pathname component
+ * @name: pathname component to lookup
+ * @base: base directory to lookup from
+ * @len: maximum length @len should be interpreted to
+ *
+ * Note that this routine is purely a helper for filesystem usage and should
+ * not be called by generic code.
+ *
+ * Unlike lookup_one_len, it should be called without the parent
+ * i_mutex held, and will take the i_mutex itself if necessary.
+ */
+struct dentry *lookup_one_len_unlocked(const char *name,
+ struct dentry *base, int len)
+{
+ struct qstr this;
+ unsigned int c;
+ int err;
+ struct dentry *ret;
+
+ this.name = name;
+ this.len = len;
+ this.hash = full_name_hash(name, len);
+ if (!len)
+ return ERR_PTR(-EACCES);
+
+ if (unlikely(name[0] == '.')) {
+ if (len < 2 || (len == 2 && name[1] == '.'))
+ return ERR_PTR(-EACCES);
+ }
+
+ while (len--) {
+ c = *(const unsigned char *)name++;
+ if (c == '/' || c == '\0')
+ return ERR_PTR(-EACCES);
+ }
+ /*
+ * See if the low-level filesystem might want
+ * to use its own hash..
+ */
+ if (base->d_flags & DCACHE_OP_HASH) {
+ int err = base->d_op->d_hash(base, &this);
+ if (err < 0)
+ return ERR_PTR(err);
+ }
+
+ err = inode_permission(base->d_inode, MAY_EXEC);
+ if (err)
+ return ERR_PTR(err);
+
+ /*
+ * __d_lookup() is used to try to get a quick answer and avoid the
+ * mutex. A false-negative does no harm.
+ */
+ ret = __d_lookup(base, &this);
+ if (ret && unlikely(ret->d_flags & DCACHE_OP_REVALIDATE)) {
+ dput(ret);
+ ret = NULL;
+ }
+ if (ret)
+ return ret;
+
+ inode_lock(base->d_inode);
+ ret = __lookup_hash(&this, base, 0);
+ inode_unlock(base->d_inode);
+ return ret;
+}
+EXPORT_SYMBOL(lookup_one_len_unlocked);
+
int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
struct path *path, int *empty)
{
@@ -2402,7 +2468,7 @@ mountpoint_last(struct nameidata *nd, struct path *path)
goto done;
}
- mutex_lock(&dir->d_inode->i_mutex);
+ inode_lock(dir->d_inode);
dentry = d_lookup(dir, &nd->last);
if (!dentry) {
/*
@@ -2412,16 +2478,16 @@ mountpoint_last(struct nameidata *nd, struct path *path)
*/
dentry = d_alloc(dir, &nd->last);
if (!dentry) {
- mutex_unlock(&dir->d_inode->i_mutex);
+ inode_unlock(dir->d_inode);
return -ENOMEM;
}
dentry = lookup_real(dir->d_inode, dentry, nd->flags);
if (IS_ERR(dentry)) {
- mutex_unlock(&dir->d_inode->i_mutex);
+ inode_unlock(dir->d_inode);
return PTR_ERR(dentry);
}
}
- mutex_unlock(&dir->d_inode->i_mutex);
+ inode_unlock(dir->d_inode);
done:
if (d_is_negative(dentry)) {
@@ -2611,7 +2677,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
struct dentry *p;
if (p1 == p2) {
- mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
return NULL;
}
@@ -2619,29 +2685,29 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
p = d_ancestor(p2, p1);
if (p) {
- mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
+ inode_lock_nested(p1->d_inode, I_MUTEX_CHILD);
return p;
}
p = d_ancestor(p1, p2);
if (p) {
- mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
+ inode_lock_nested(p2->d_inode, I_MUTEX_CHILD);
return p;
}
- mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT2);
+ inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
+ inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
return NULL;
}
EXPORT_SYMBOL(lock_rename);
void unlock_rename(struct dentry *p1, struct dentry *p2)
{
- mutex_unlock(&p1->d_inode->i_mutex);
+ inode_unlock(p1->d_inode);
if (p1 != p2) {
- mutex_unlock(&p2->d_inode->i_mutex);
+ inode_unlock(p2->d_inode);
mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
}
}
@@ -2674,10 +2740,6 @@ static int may_open(struct path *path, int acc_mode, int flag)
struct inode *inode = dentry->d_inode;
int error;
- /* O_PATH? */
- if (!acc_mode)
- return 0;
-
if (!inode)
return -ENOENT;
@@ -2699,7 +2761,7 @@ static int may_open(struct path *path, int acc_mode, int flag)
break;
}
- error = inode_permission(inode, acc_mode);
+ error = inode_permission(inode, MAY_OPEN | acc_mode);
if (error)
return error;
@@ -2891,7 +2953,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
if (*opened & FILE_CREATED) {
WARN_ON(!(open_flag & O_CREAT));
fsnotify_create(dir, dentry);
- acc_mode = MAY_OPEN;
+ acc_mode = 0;
}
error = may_open(&file->f_path, acc_mode, open_flag);
if (error)
@@ -3084,9 +3146,9 @@ retry_lookup:
* dropping this one anyway.
*/
}
- mutex_lock(&dir->d_inode->i_mutex);
+ inode_lock(dir->d_inode);
error = lookup_open(nd, &path, file, op, got_write, opened);
- mutex_unlock(&dir->d_inode->i_mutex);
+ inode_unlock(dir->d_inode);
if (error <= 0) {
if (error)
@@ -3104,7 +3166,7 @@ retry_lookup:
/* Don't check for write permission, don't truncate */
open_flag &= ~O_TRUNC;
will_truncate = false;
- acc_mode = MAY_OPEN;
+ acc_mode = 0;
path_to_nameidata(&path, nd);
goto finish_open_created;
}
@@ -3187,10 +3249,11 @@ finish_open:
got_write = true;
}
finish_open_created:
- error = may_open(&nd->path, acc_mode, open_flag);
- if (error)
- goto out;
-
+ if (likely(!(open_flag & O_PATH))) {
+ error = may_open(&nd->path, acc_mode, open_flag);
+ if (error)
+ goto out;
+ }
BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
error = vfs_open(&nd->path, file, current_cred());
if (!error) {
@@ -3281,7 +3344,7 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
goto out2;
audit_inode(nd->name, child, 0);
/* Don't check for other permissions, the inode was just created */
- error = may_open(&path, MAY_OPEN, op->open_flag);
+ error = may_open(&path, 0, op->open_flag);
if (error)
goto out2;
file->f_path.mnt = path.mnt;
@@ -3434,7 +3497,7 @@ static struct dentry *filename_create(int dfd, struct filename *name,
* Do the final lookup.
*/
lookup_flags |= LOOKUP_CREATE | LOOKUP_EXCL;
- mutex_lock_nested(&path->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
dentry = __lookup_hash(&last, path->dentry, lookup_flags);
if (IS_ERR(dentry))
goto unlock;
@@ -3463,7 +3526,7 @@ fail:
dput(dentry);
dentry = ERR_PTR(error);
unlock:
- mutex_unlock(&path->dentry->d_inode->i_mutex);
+ inode_unlock(path->dentry->d_inode);
if (!err2)
mnt_drop_write(path->mnt);
out:
@@ -3483,7 +3546,7 @@ EXPORT_SYMBOL(kern_path_create);
void done_path_create(struct path *path, struct dentry *dentry)
{
dput(dentry);
- mutex_unlock(&path->dentry->d_inode->i_mutex);
+ inode_unlock(path->dentry->d_inode);
mnt_drop_write(path->mnt);
path_put(path);
}
@@ -3680,7 +3743,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
return -EPERM;
dget(dentry);
- mutex_lock(&dentry->d_inode->i_mutex);
+ inode_lock(dentry->d_inode);
error = -EBUSY;
if (is_local_mountpoint(dentry))
@@ -3700,7 +3763,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
detach_mounts(dentry);
out:
- mutex_unlock(&dentry->d_inode->i_mutex);
+ inode_unlock(dentry->d_inode);
dput(dentry);
if (!error)
d_delete(dentry);
@@ -3739,7 +3802,7 @@ retry:
if (error)
goto exit1;
- mutex_lock_nested(&path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
dentry = __lookup_hash(&last, path.dentry, lookup_flags);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
@@ -3755,7 +3818,7 @@ retry:
exit3:
dput(dentry);
exit2:
- mutex_unlock(&path.dentry->d_inode->i_mutex);
+ inode_unlock(path.dentry->d_inode);
mnt_drop_write(path.mnt);
exit1:
path_put(&path);
@@ -3801,7 +3864,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate
if (!dir->i_op->unlink)
return -EPERM;
- mutex_lock(&target->i_mutex);
+ inode_lock(target);
if (is_local_mountpoint(dentry))
error = -EBUSY;
else {
@@ -3818,7 +3881,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate
}
}
out:
- mutex_unlock(&target->i_mutex);
+ inode_unlock(target);
/* We don't d_delete() NFS sillyrenamed files--they still exist. */
if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
@@ -3861,7 +3924,7 @@ retry:
if (error)
goto exit1;
retry_deleg:
- mutex_lock_nested(&path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
dentry = __lookup_hash(&last, path.dentry, lookup_flags);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
@@ -3879,7 +3942,7 @@ retry_deleg:
exit2:
dput(dentry);
}
- mutex_unlock(&path.dentry->d_inode->i_mutex);
+ inode_unlock(path.dentry->d_inode);
if (inode)
iput(inode); /* truncate the inode here */
inode = NULL;
@@ -4031,7 +4094,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
if (error)
return error;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* Make sure we don't allow creating hardlink to an unlinked file */
if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
error = -ENOENT;
@@ -4048,7 +4111,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
inode->i_state &= ~I_LINKABLE;
spin_unlock(&inode->i_lock);
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (!error)
fsnotify_link(dir, inode, new_dentry);
return error;
@@ -4248,7 +4311,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (!is_dir || (flags & RENAME_EXCHANGE))
lock_two_nondirectories(source, target);
else if (target)
- mutex_lock(&target->i_mutex);
+ inode_lock(target);
error = -EBUSY;
if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry))
@@ -4301,7 +4364,7 @@ out:
if (!is_dir || (flags & RENAME_EXCHANGE))
unlock_two_nondirectories(source, target);
else if (target)
- mutex_unlock(&target->i_mutex);
+ inode_unlock(target);
dput(new_dentry);
if (!error) {
fsnotify_move(old_dir, new_dir, old_name, is_dir,
@@ -4503,72 +4566,73 @@ EXPORT_SYMBOL(readlink_copy);
/*
* A helper for ->readlink(). This should be used *ONLY* for symlinks that
- * have ->follow_link() touching nd only in nd_set_link(). Using (or not
- * using) it for any given inode is up to filesystem.
+ * have ->get_link() not calling nd_jump_link(). Using (or not using) it
+ * for any given inode is up to filesystem.
*/
int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
{
- void *cookie;
+ DEFINE_DELAYED_CALL(done);
struct inode *inode = d_inode(dentry);
const char *link = inode->i_link;
int res;
if (!link) {
- link = inode->i_op->follow_link(dentry, &cookie);
+ link = inode->i_op->get_link(dentry, inode, &done);
if (IS_ERR(link))
return PTR_ERR(link);
}
res = readlink_copy(buffer, buflen, link);
- if (inode->i_op->put_link)
- inode->i_op->put_link(inode, cookie);
+ do_delayed_call(&done);
return res;
}
EXPORT_SYMBOL(generic_readlink);
/* get the link contents into pagecache */
-static char *page_getlink(struct dentry * dentry, struct page **ppage)
+const char *page_get_link(struct dentry *dentry, struct inode *inode,
+ struct delayed_call *callback)
{
char *kaddr;
struct page *page;
- struct address_space *mapping = dentry->d_inode->i_mapping;
- page = read_mapping_page(mapping, 0, NULL);
- if (IS_ERR(page))
- return (char*)page;
- *ppage = page;
- kaddr = kmap(page);
- nd_terminate_link(kaddr, dentry->d_inode->i_size, PAGE_SIZE - 1);
+ struct address_space *mapping = inode->i_mapping;
+
+ if (!dentry) {
+ page = find_get_page(mapping, 0);
+ if (!page)
+ return ERR_PTR(-ECHILD);
+ if (!PageUptodate(page)) {
+ put_page(page);
+ return ERR_PTR(-ECHILD);
+ }
+ } else {
+ page = read_mapping_page(mapping, 0, NULL);
+ if (IS_ERR(page))
+ return (char*)page;
+ }
+ set_delayed_call(callback, page_put_link, page);
+ BUG_ON(mapping_gfp_mask(mapping) & __GFP_HIGHMEM);
+ kaddr = page_address(page);
+ nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1);
return kaddr;
}
-int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
-{
- struct page *page = NULL;
- int res = readlink_copy(buffer, buflen, page_getlink(dentry, &page));
- if (page) {
- kunmap(page);
- page_cache_release(page);
- }
- return res;
-}
-EXPORT_SYMBOL(page_readlink);
+EXPORT_SYMBOL(page_get_link);
-const char *page_follow_link_light(struct dentry *dentry, void **cookie)
+void page_put_link(void *arg)
{
- struct page *page = NULL;
- char *res = page_getlink(dentry, &page);
- if (!IS_ERR(res))
- *cookie = page;
- return res;
+ put_page(arg);
}
-EXPORT_SYMBOL(page_follow_link_light);
+EXPORT_SYMBOL(page_put_link);
-void page_put_link(struct inode *unused, void *cookie)
+int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
{
- struct page *page = cookie;
- kunmap(page);
- page_cache_release(page);
+ DEFINE_DELAYED_CALL(done);
+ int res = readlink_copy(buffer, buflen,
+ page_get_link(dentry, d_inode(dentry),
+ &done));
+ do_delayed_call(&done);
+ return res;
}
-EXPORT_SYMBOL(page_put_link);
+EXPORT_SYMBOL(page_readlink);
/*
* The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS
@@ -4579,7 +4643,6 @@ int __page_symlink(struct inode *inode, const char *symname, int len, int nofs)
struct page *page;
void *fsdata;
int err;
- char *kaddr;
unsigned int flags = AOP_FLAG_UNINTERRUPTIBLE;
if (nofs)
flags |= AOP_FLAG_NOFS;
@@ -4590,9 +4653,7 @@ retry:
if (err)
goto fail;
- kaddr = kmap_atomic(page);
- memcpy(kaddr, symname, len-1);
- kunmap_atomic(kaddr);
+ memcpy(page_address(page), symname, len-1);
err = pagecache_write_end(NULL, mapping, 0, len-1, len-1,
page, fsdata);
@@ -4617,7 +4678,6 @@ EXPORT_SYMBOL(page_symlink);
const struct inode_operations page_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
};
EXPORT_SYMBOL(page_symlink_inode_operations);
diff --git a/fs/namespace.c b/fs/namespace.c
index fc5002819..4fb1691b4 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -463,7 +463,6 @@ void __mnt_drop_write(struct vfsmount *mnt)
mnt_dec_writers(real_mount(mnt));
preempt_enable();
}
-EXPORT_SYMBOL_GPL(__mnt_drop_write);
/**
* mnt_drop_write - give up write access to a mount
@@ -1585,6 +1584,14 @@ static inline bool may_mount(void)
return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
}
+static inline bool may_mandlock(void)
+{
+#ifndef CONFIG_MANDATORY_FILE_LOCKING
+ return false;
+#endif
+ return capable(CAP_SYS_ADMIN);
+}
+
/*
* Now umount can handle mount points as well as block devices.
* This is important for filesystems which use unnamed block devices.
@@ -1804,7 +1811,6 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
}
return 0;
}
-EXPORT_SYMBOL_GPL(iterate_mounts);
static void cleanup_group_ids(struct mount *mnt, struct mount *end)
{
@@ -1955,9 +1961,9 @@ static struct mountpoint *lock_mount(struct path *path)
struct vfsmount *mnt;
struct dentry *dentry = path->dentry;
retry:
- mutex_lock(&dentry->d_inode->i_mutex);
+ inode_lock(dentry->d_inode);
if (unlikely(cant_mount(dentry))) {
- mutex_unlock(&dentry->d_inode->i_mutex);
+ inode_unlock(dentry->d_inode);
return ERR_PTR(-ENOENT);
}
namespace_lock();
@@ -1968,13 +1974,13 @@ retry:
mp = new_mountpoint(dentry);
if (IS_ERR(mp)) {
namespace_unlock();
- mutex_unlock(&dentry->d_inode->i_mutex);
+ inode_unlock(dentry->d_inode);
return mp;
}
return mp;
}
namespace_unlock();
- mutex_unlock(&path->dentry->d_inode->i_mutex);
+ inode_unlock(path->dentry->d_inode);
path_put(path);
path->mnt = mnt;
dentry = path->dentry = dget(mnt->mnt_root);
@@ -1986,7 +1992,7 @@ static void unlock_mount(struct mountpoint *where)
struct dentry *dentry = where->m_dentry;
put_mountpoint(where);
namespace_unlock();
- mutex_unlock(&dentry->d_inode->i_mutex);
+ inode_unlock(dentry->d_inode);
}
static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
@@ -2603,18 +2609,18 @@ static long exact_copy_from_user(void *to, const void __user * from,
return n;
}
-int copy_mount_options(const void __user * data, unsigned long *where)
+void *copy_mount_options(const void __user * data)
{
int i;
- unsigned long page;
unsigned long size;
+ char *copy;
- *where = 0;
if (!data)
- return 0;
+ return NULL;
- if (!(page = __get_free_page(GFP_KERNEL)))
- return -ENOMEM;
+ copy = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!copy)
+ return ERR_PTR(-ENOMEM);
/* We only care that *some* data at the address the user
* gave us is valid. Just in case, we'll zero
@@ -2625,15 +2631,14 @@ int copy_mount_options(const void __user * data, unsigned long *where)
if (size > PAGE_SIZE)
size = PAGE_SIZE;
- i = size - exact_copy_from_user((void *)page, data, size);
+ i = size - exact_copy_from_user(copy, data, size);
if (!i) {
- free_page(page);
- return -EFAULT;
+ kfree(copy);
+ return ERR_PTR(-EFAULT);
}
if (i != PAGE_SIZE)
- memset((char *)page + i, 0, PAGE_SIZE - i);
- *where = page;
- return 0;
+ memset(copy + i, 0, PAGE_SIZE - i);
+ return copy;
}
char *copy_mount_string(const void __user *data)
@@ -2679,6 +2684,8 @@ long do_mount(const char *dev_name, const char __user *dir_name,
type_page, flags, data_page);
if (!retval && !may_mount())
retval = -EPERM;
+ if (!retval && (flags & MS_MANDLOCK) && !may_mandlock())
+ retval = -EPERM;
if (retval)
goto dput_out;
@@ -2898,7 +2905,7 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
int ret;
char *kernel_type;
char *kernel_dev;
- unsigned long data_page;
+ void *options;
kernel_type = copy_mount_string(type);
ret = PTR_ERR(kernel_type);
@@ -2910,14 +2917,14 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
if (IS_ERR(kernel_dev))
goto out_dev;
- ret = copy_mount_options(data, &data_page);
- if (ret < 0)
+ options = copy_mount_options(data);
+ ret = PTR_ERR(options);
+ if (IS_ERR(options))
goto out_data;
- ret = do_mount(kernel_dev, dir_name, kernel_type, flags,
- (void *) data_page);
+ ret = do_mount(kernel_dev, dir_name, kernel_type, flags, options);
- free_page(data_page);
+ kfree(options);
out_data:
kfree(kernel_dev);
out_dev:
@@ -2941,9 +2948,9 @@ bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
}
-int path_is_under(struct path *path1, struct path *path2)
+bool path_is_under(struct path *path1, struct path *path2)
{
- int res;
+ bool res;
read_seqlock_excl(&mount_lock);
res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
read_sequnlock_excl(&mount_lock);
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index f0e3e9e74..b7f8eaeea 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -369,7 +369,7 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
if (!res) {
struct inode *inode = d_inode(dentry);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (finfo.i.dirEntNum == NCP_FINFO(inode)->dirEntNum) {
ncp_new_dentry(dentry);
val=1;
@@ -377,7 +377,7 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
ncp_dbg(2, "found, but dirEntNum changed\n");
ncp_update_inode2(inode, &finfo);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
finished:
@@ -633,15 +633,15 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx,
d_rehash(newdent);
} else {
spin_lock(&dentry->d_lock);
- NCP_FINFO(inode)->flags &= ~NCPI_DIR_CACHE;
+ NCP_FINFO(dir)->flags &= ~NCPI_DIR_CACHE;
spin_unlock(&dentry->d_lock);
}
} else {
struct inode *inode = d_inode(newdent);
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(inode, I_MUTEX_CHILD);
ncp_update_inode2(inode, entry);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
if (ctl.idx >= NCP_DIRCACHE_SIZE) {
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 011324ce9..dd38ca1f2 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -224,10 +224,10 @@ ncp_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
iocb->ki_pos = pos;
if (pos > i_size_read(inode)) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (pos > i_size_read(inode))
i_size_write(inode, pos);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
ncp_dbg(1, "exit %pD2\n", file);
outrel:
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 9605a2f63..1af15fcbe 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -82,7 +82,7 @@ static int init_inodecache(void)
ncp_inode_cachep = kmem_cache_create("ncp_inode_cache",
sizeof(struct ncp_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
init_once);
if (ncp_inode_cachep == NULL)
return -ENOMEM;
@@ -244,8 +244,7 @@ static void ncp_set_attr(struct inode *inode, struct ncp_entry_info *nwinfo)
#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS)
static const struct inode_operations ncp_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.setattr = ncp_notify_change,
};
#endif
@@ -283,6 +282,7 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info)
#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS)
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &ncp_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_data.a_ops = &ncp_symlink_aops;
#endif
} else {
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index c59a59c37..35ab51c04 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -476,6 +476,7 @@ static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
for (i = 0; i < nr_pages; i++)
put_page(arg->layoutupdate_pages[i]);
+ vfree(arg->start_p);
kfree(arg->layoutupdate_pages);
} else {
put_page(arg->layoutupdate_page);
@@ -559,10 +560,15 @@ retry:
if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) {
void *p = start_p, *end = p + arg->layoutupdate_len;
+ struct page *page = NULL;
int i = 0;
- for ( ; p < end; p += PAGE_SIZE)
- arg->layoutupdate_pages[i++] = vmalloc_to_page(p);
+ arg->start_p = start_p;
+ for ( ; p < end; p += PAGE_SIZE) {
+ page = vmalloc_to_page(p);
+ arg->layoutupdate_pages[i++] = page;
+ get_page(page);
+ }
}
dprintk("%s found %zu ranges\n", __func__, count);
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 807eb6ef4..f0939d097 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -83,8 +83,11 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
res = htonl(NFS4ERR_BADHANDLE);
inode = nfs_delegation_find_inode(cps->clp, &args->fh);
- if (inode == NULL)
+ if (inode == NULL) {
+ trace_nfs4_cb_recall(cps->clp, &args->fh, NULL,
+ &args->stateid, -ntohl(res));
goto out;
+ }
/* Set up a helper thread to actually return the delegation */
switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
case 0:
@@ -96,7 +99,8 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
default:
res = htonl(NFS4ERR_RESOURCE);
}
- trace_nfs4_recall_delegation(inode, -ntohl(res));
+ trace_nfs4_cb_recall(cps->clp, &args->fh, inode,
+ &args->stateid, -ntohl(res));
iput(inode);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
@@ -160,6 +164,22 @@ static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp,
return lo;
}
+/*
+ * Enforce RFC5661 section 12.5.5.2.1. (Layout Recall and Return Sequencing)
+ */
+static bool pnfs_check_stateid_sequence(struct pnfs_layout_hdr *lo,
+ const nfs4_stateid *new)
+{
+ u32 oldseq, newseq;
+
+ oldseq = be32_to_cpu(lo->plh_stateid.seqid);
+ newseq = be32_to_cpu(new->seqid);
+
+ if (newseq > oldseq + 1)
+ return false;
+ return true;
+}
+
static u32 initiate_file_draining(struct nfs_client *clp,
struct cb_layoutrecallargs *args)
{
@@ -169,34 +189,52 @@ static u32 initiate_file_draining(struct nfs_client *clp,
LIST_HEAD(free_me_list);
lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid);
- if (!lo)
+ if (!lo) {
+ trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, NULL,
+ &args->cbl_stateid, -rv);
goto out;
+ }
ino = lo->plh_inode;
spin_lock(&ino->i_lock);
+ if (!pnfs_check_stateid_sequence(lo, &args->cbl_stateid)) {
+ rv = NFS4ERR_DELAY;
+ goto unlock;
+ }
pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
spin_unlock(&ino->i_lock);
pnfs_layoutcommit_inode(ino, false);
spin_lock(&ino->i_lock);
- if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
- pnfs_mark_matching_lsegs_invalid(lo, &free_me_list,
- &args->cbl_range)) {
+ /*
+ * Enforce RFC5661 Section 12.5.5.2.1.5 (Bulk Recall and Return)
+ */
+ if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
rv = NFS4ERR_DELAY;
goto unlock;
}
+ if (pnfs_mark_matching_lsegs_return(lo, &free_me_list,
+ &args->cbl_range)) {
+ rv = NFS4_OK;
+ goto unlock;
+ }
+
if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) {
NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo,
&args->cbl_range);
}
+ pnfs_mark_layout_returned_if_empty(lo);
unlock:
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&free_me_list);
+ /* Free all lsegs that are attached to commit buckets */
+ nfs_commit_inode(ino, 0);
pnfs_put_layout_hdr(lo);
- trace_nfs4_cb_layoutrecall_inode(clp, &args->cbl_fh, ino, -rv);
+ trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, ino,
+ &args->cbl_stateid, -rv);
iput(ino);
out:
return rv;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ce5a21861..9cce67043 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -940,7 +940,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
filp, offset, whence);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
switch (whence) {
case 1:
offset += filp->f_pos;
@@ -957,7 +957,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
dir_ctx->duped = 0;
}
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return offset;
}
@@ -972,9 +972,9 @@ static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return 0;
}
@@ -1894,15 +1894,14 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
attr.ia_mode = S_IFLNK | S_IRWXUGO;
attr.ia_valid = ATTR_MODE;
- page = alloc_page(GFP_HIGHUSER);
+ page = alloc_page(GFP_USER);
if (!page)
return -ENOMEM;
- kaddr = kmap_atomic(page);
+ kaddr = page_address(page);
memcpy(kaddr, symname, pathlen);
if (pathlen < PAGE_SIZE)
memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
- kunmap_atomic(kaddr);
trace_nfs_symlink_enter(dir, dentry);
error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
@@ -2432,6 +2431,20 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
}
EXPORT_SYMBOL_GPL(nfs_may_open);
+static int nfs_execute_ok(struct inode *inode, int mask)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ int ret;
+
+ if (mask & MAY_NOT_BLOCK)
+ ret = nfs_revalidate_inode_rcu(server, inode);
+ else
+ ret = nfs_revalidate_inode(server, inode);
+ if (ret == 0 && !execute_ok(inode))
+ ret = -EACCES;
+ return ret;
+}
+
int nfs_permission(struct inode *inode, int mask)
{
struct rpc_cred *cred;
@@ -2449,6 +2462,9 @@ int nfs_permission(struct inode *inode, int mask)
case S_IFLNK:
goto out;
case S_IFREG:
+ if ((mask & MAY_OPEN) &&
+ nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN))
+ return 0;
break;
case S_IFDIR:
/*
@@ -2481,8 +2497,8 @@ force_lookup:
res = PTR_ERR(cred);
}
out:
- if (!res && (mask & MAY_EXEC) && !execute_ok(inode))
- res = -EACCES;
+ if (!res && (mask & MAY_EXEC))
+ res = nfs_execute_ok(inode, mask);
dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n",
inode->i_sb->s_id, inode->i_ino, mask, res);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 4b1d08f56..7a0cfd326 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -117,12 +117,6 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
return atomic_dec_and_test(&dreq->io_count);
}
-void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq)
-{
- dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
-}
-EXPORT_SYMBOL_GPL(nfs_direct_set_resched_writes);
-
static void
nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr)
{
@@ -586,7 +580,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
if (!count)
goto out;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
result = nfs_sync_mapping(mapping);
if (result)
goto out_unlock;
@@ -614,7 +608,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
NFS_I(inode)->read_io += count;
result = nfs_direct_read_schedule_iovec(dreq, iter, pos);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (!result) {
result = nfs_direct_wait(dreq);
@@ -628,7 +622,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
out_release:
nfs_direct_req_release(dreq);
out_unlock:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
out:
return result;
}
@@ -670,6 +664,10 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
req = nfs_list_entry(reqs.next);
nfs_direct_setup_mirroring(dreq, &desc, req);
+ if (desc.pg_error < 0) {
+ list_splice_init(&reqs, &failed);
+ goto out_failed;
+ }
list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
if (!nfs_pageio_add_request(&desc, req)) {
@@ -677,13 +675,17 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
nfs_list_add_request(req, &failed);
spin_lock(cinfo.lock);
dreq->flags = 0;
- dreq->error = -EIO;
+ if (desc.pg_error < 0)
+ dreq->error = desc.pg_error;
+ else
+ dreq->error = -EIO;
spin_unlock(cinfo.lock);
}
nfs_release_request(req);
}
nfs_pageio_complete(&desc);
+out_failed:
while (!list_empty(&failed)) {
req = nfs_list_entry(failed.next);
nfs_list_remove_request(req);
@@ -727,14 +729,20 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
nfs_direct_write_complete(dreq, data->inode);
}
-static void nfs_direct_error_cleanup(struct nfs_inode *nfsi)
+static void nfs_direct_resched_write(struct nfs_commit_info *cinfo,
+ struct nfs_page *req)
{
- /* There is no lock to clear */
+ struct nfs_direct_req *dreq = cinfo->dreq;
+
+ spin_lock(&dreq->lock);
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ spin_unlock(&dreq->lock);
+ nfs_mark_request_commit(req, NULL, cinfo, 0);
}
static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = {
.completion = nfs_direct_commit_complete,
- .error_cleanup = nfs_direct_error_cleanup,
+ .resched_write = nfs_direct_resched_write,
};
static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
@@ -839,10 +847,25 @@ static void nfs_write_sync_pgio_error(struct list_head *head)
}
}
+static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr)
+{
+ struct nfs_direct_req *dreq = hdr->dreq;
+
+ spin_lock(&dreq->lock);
+ if (dreq->error == 0) {
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ /* fake unstable write to let common nfs resend pages */
+ hdr->verf.committed = NFS_UNSTABLE;
+ hdr->good_bytes = hdr->args.count;
+ }
+ spin_unlock(&dreq->lock);
+}
+
static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
.error_cleanup = nfs_write_sync_pgio_error,
.init_hdr = nfs_direct_pgio_init,
.completion = nfs_direct_write_completion,
+ .reschedule_io = nfs_direct_write_reschedule_io,
};
@@ -900,6 +923,11 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
}
nfs_direct_setup_mirroring(dreq, &desc, req);
+ if (desc.pg_error < 0) {
+ nfs_free_request(req);
+ result = desc.pg_error;
+ break;
+ }
nfs_lock_request(req);
req->wb_index = pos >> PAGE_SHIFT;
@@ -977,7 +1005,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
pos = iocb->ki_pos;
end = (pos + iov_iter_count(iter) - 1) >> PAGE_CACHE_SHIFT;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
result = nfs_sync_mapping(mapping);
if (result)
@@ -1017,7 +1045,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
pos >> PAGE_CACHE_SHIFT, end);
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (!result) {
result = nfs_direct_wait(dreq);
@@ -1038,7 +1066,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
out_release:
nfs_direct_req_release(dreq);
out_unlock:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return result;
}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 93e236429..748bb813b 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -278,9 +278,9 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret != 0)
break;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = nfs_file_fsync_commit(file, start, end, datasync);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/*
* If nfs_file_fsync_commit detected a server reboot, then
* resend all dirty pages that might have been covered by
@@ -514,7 +514,7 @@ static void nfs_check_dirty_writeback(struct page *page,
* so it will not block due to pages that will shortly be freeable.
*/
nfsi = NFS_I(mapping->host);
- if (test_bit(NFS_INO_COMMIT, &nfsi->flags)) {
+ if (atomic_read(&nfsi->commit_info.rpcs_out)) {
*writeback = true;
return;
}
@@ -545,7 +545,7 @@ static int nfs_launder_page(struct page *page)
inode->i_ino, (long long)page_offset(page));
nfs_fscache_wait_on_page_write(nfsi, page);
- return nfs_wb_page(inode, page);
+ return nfs_wb_launder_page(inode, page);
}
static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
@@ -756,7 +756,7 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
l_ctx = nfs_get_lock_context(nfs_file_open_context(filp));
if (!IS_ERR(l_ctx)) {
- status = nfs_iocounter_wait(&l_ctx->io_count);
+ status = nfs_iocounter_wait(l_ctx);
nfs_put_lock_context(l_ctx);
if (status < 0)
return status;
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 02ec07973..3384dc8e6 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -202,6 +202,7 @@ static int filelayout_async_handle_error(struct rpc_task *task,
task->tk_status);
nfs4_mark_deviceid_unavailable(devid);
pnfs_error_mark_layout_for_return(inode, lseg);
+ pnfs_set_lo_fail(lseg);
rpc_wake_up(&tbl->slot_tbl_waitq);
/* fall through */
default:
@@ -883,13 +884,19 @@ static void
filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
- if (!pgio->pg_lseg)
+ if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
NFS4_MAX_UINT64,
IOMODE_READ,
GFP_KERNEL);
+ if (IS_ERR(pgio->pg_lseg)) {
+ pgio->pg_error = PTR_ERR(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ return;
+ }
+ }
/* If no lseg, fall back to read through mds */
if (pgio->pg_lseg == NULL)
nfs_pageio_reset_read_mds(pgio);
@@ -902,13 +909,20 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_commit_info cinfo;
int status;
- if (!pgio->pg_lseg)
+ if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
NFS4_MAX_UINT64,
IOMODE_RW,
GFP_NOFS);
+ if (IS_ERR(pgio->pg_lseg)) {
+ pgio->pg_error = PTR_ERR(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ return;
+ }
+ }
+
/* If no lseg, fall back to write through mds */
if (pgio->pg_lseg == NULL)
goto out_mds;
@@ -957,7 +971,7 @@ filelayout_mark_request_commit(struct nfs_page *req,
u32 i, j;
if (fl->commit_through_mds) {
- nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
+ nfs_request_add_commit_list(req, cinfo);
} else {
/* Note that we are calling nfs4_fl_calc_j_index on each page
* that ends up being committed to a data server. An attractive
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 2a2e2d8dd..0cb1abd53 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -505,9 +505,17 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
}
p = xdr_inline_decode(&stream, 4);
- if (p)
- fls->flags = be32_to_cpup(p);
+ if (!p)
+ goto out_sort_mirrors;
+ fls->flags = be32_to_cpup(p);
+
+ p = xdr_inline_decode(&stream, 4);
+ if (!p)
+ goto out_sort_mirrors;
+ for (i=0; i < fls->mirror_array_cnt; i++)
+ fls->mirror_array[i]->report_interval = be32_to_cpup(p);
+out_sort_mirrors:
ff_layout_sort_mirrors(fls);
rc = ff_layout_check_layout(lgr);
if (rc)
@@ -603,7 +611,9 @@ nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror,
mirror->start_time = now;
if (ktime_equal(mirror->last_report_time, notime))
mirror->last_report_time = now;
- if (layoutstats_timer != 0)
+ if (mirror->report_interval != 0)
+ report_interval = (s64)mirror->report_interval * 1000LL;
+ else if (layoutstats_timer != 0)
report_interval = (s64)layoutstats_timer * 1000LL;
if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >=
report_interval) {
@@ -785,13 +795,19 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
int ds_idx;
/* Use full layout for now */
- if (!pgio->pg_lseg)
+ if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
NFS4_MAX_UINT64,
IOMODE_READ,
GFP_KERNEL);
+ if (IS_ERR(pgio->pg_lseg)) {
+ pgio->pg_error = PTR_ERR(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ return;
+ }
+ }
/* If no lseg, fall back to read through mds */
if (pgio->pg_lseg == NULL)
goto out_mds;
@@ -825,13 +841,19 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
int i;
int status;
- if (!pgio->pg_lseg)
+ if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
NFS4_MAX_UINT64,
IOMODE_RW,
GFP_NOFS);
+ if (IS_ERR(pgio->pg_lseg)) {
+ pgio->pg_error = PTR_ERR(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ return;
+ }
+ }
/* If no lseg, fall back to write through mds */
if (pgio->pg_lseg == NULL)
goto out_mds;
@@ -867,18 +889,25 @@ static unsigned int
ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
- if (!pgio->pg_lseg)
+ if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
NFS4_MAX_UINT64,
IOMODE_RW,
GFP_NOFS);
+ if (IS_ERR(pgio->pg_lseg)) {
+ pgio->pg_error = PTR_ERR(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ goto out;
+ }
+ }
if (pgio->pg_lseg)
return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg);
/* no lseg means that pnfs is not in use, so no mirroring here */
nfs_pageio_reset_write_mds(pgio);
+out:
return 1;
}
@@ -912,18 +941,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs)
hdr->args.count,
(unsigned long long)hdr->args.offset);
- if (!hdr->dreq) {
- struct nfs_open_context *ctx;
-
- ctx = nfs_list_entry(hdr->pages.next)->wb_context;
- set_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
- hdr->completion_ops->error_cleanup(&hdr->pages);
- } else {
- nfs_direct_set_resched_writes(hdr->dreq);
- /* fake unstable write to let common nfs resend pages */
- hdr->verf.committed = NFS_UNSTABLE;
- hdr->good_bytes = hdr->args.count;
- }
+ hdr->completion_ops->reschedule_io(hdr);
return;
}
@@ -1101,7 +1119,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
return -NFS4ERR_RESET_TO_PNFS;
out_retry:
task->tk_status = 0;
- rpc_restart_call(task);
+ rpc_restart_call_prepare(task);
rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
return -EAGAIN;
}
@@ -1159,6 +1177,14 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
}
}
+ switch (status) {
+ case NFS4ERR_DELAY:
+ case NFS4ERR_GRACE:
+ return;
+ default:
+ break;
+ }
+
mirror = FF_LAYOUT_COMP(lseg, idx);
err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
mirror, offset, length, status, opnum,
@@ -1189,7 +1215,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
hdr->pgio_mirror_idx + 1,
&hdr->pgio_mirror_idx))
goto out_eagain;
- set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+ set_bit(NFS_LAYOUT_RETURN_REQUESTED,
&hdr->lseg->pls_layout->plh_flags);
pnfs_read_resend_pnfs(hdr);
return task->tk_status;
@@ -1242,14 +1268,31 @@ ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx)
return ff_layout_test_devid_unavailable(node);
}
-static int ff_layout_read_prepare_common(struct rpc_task *task,
- struct nfs_pgio_header *hdr)
+static void ff_layout_read_record_layoutstats_start(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
{
+ if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags))
+ return;
nfs4_ff_layout_stat_io_start_read(hdr->inode,
FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
hdr->args.count,
task->tk_start);
+}
+
+static void ff_layout_read_record_layoutstats_done(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
+{
+ if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags))
+ return;
+ nfs4_ff_layout_stat_io_end_read(task,
+ FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
+ hdr->args.count,
+ hdr->res.count);
+}
+static int ff_layout_read_prepare_common(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
+{
if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
rpc_exit(task, -EIO);
return -EIO;
@@ -1265,6 +1308,7 @@ static int ff_layout_read_prepare_common(struct rpc_task *task,
}
hdr->pgio_done_cb = ff_layout_read_done_cb;
+ ff_layout_read_record_layoutstats_start(task, hdr);
return 0;
}
@@ -1323,10 +1367,6 @@ static void ff_layout_read_call_done(struct rpc_task *task, void *data)
dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
- nfs4_ff_layout_stat_io_end_read(task,
- FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
- hdr->args.count, hdr->res.count);
-
if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
task->tk_status == 0) {
nfs4_sequence_done(task, &hdr->res.seq_res);
@@ -1341,10 +1381,20 @@ static void ff_layout_read_count_stats(struct rpc_task *task, void *data)
{
struct nfs_pgio_header *hdr = data;
+ ff_layout_read_record_layoutstats_done(task, hdr);
rpc_count_iostats_metrics(task,
&NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_READ]);
}
+static void ff_layout_read_release(void *data)
+{
+ struct nfs_pgio_header *hdr = data;
+
+ ff_layout_read_record_layoutstats_done(&hdr->task, hdr);
+ pnfs_generic_rw_release(data);
+}
+
+
static int ff_layout_write_done_cb(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
@@ -1362,15 +1412,12 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
switch (err) {
case -NFS4ERR_RESET_TO_PNFS:
- pnfs_set_retry_layoutget(hdr->lseg->pls_layout);
ff_layout_reset_write(hdr, true);
return task->tk_status;
case -NFS4ERR_RESET_TO_MDS:
- pnfs_clear_retry_layoutget(hdr->lseg->pls_layout);
ff_layout_reset_write(hdr, false);
return task->tk_status;
case -EAGAIN:
- rpc_restart_call_prepare(task);
return -EAGAIN;
}
@@ -1402,11 +1449,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
switch (err) {
case -NFS4ERR_RESET_TO_PNFS:
- pnfs_set_retry_layoutget(data->lseg->pls_layout);
pnfs_generic_prepare_to_resend_writes(data);
return -EAGAIN;
case -NFS4ERR_RESET_TO_MDS:
- pnfs_clear_retry_layoutget(data->lseg->pls_layout);
pnfs_generic_prepare_to_resend_writes(data);
return -EAGAIN;
case -EAGAIN:
@@ -1421,14 +1466,31 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
return 0;
}
-static int ff_layout_write_prepare_common(struct rpc_task *task,
- struct nfs_pgio_header *hdr)
+static void ff_layout_write_record_layoutstats_start(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
{
+ if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags))
+ return;
nfs4_ff_layout_stat_io_start_write(hdr->inode,
FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
hdr->args.count,
task->tk_start);
+}
+
+static void ff_layout_write_record_layoutstats_done(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
+{
+ if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags))
+ return;
+ nfs4_ff_layout_stat_io_end_write(task,
+ FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
+ hdr->args.count, hdr->res.count,
+ hdr->res.verf->committed);
+}
+static int ff_layout_write_prepare_common(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
+{
if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
rpc_exit(task, -EIO);
return -EIO;
@@ -1445,6 +1507,7 @@ static int ff_layout_write_prepare_common(struct rpc_task *task,
return -EAGAIN;
}
+ ff_layout_write_record_layoutstats_start(task, hdr);
return 0;
}
@@ -1480,11 +1543,6 @@ static void ff_layout_write_call_done(struct rpc_task *task, void *data)
{
struct nfs_pgio_header *hdr = data;
- nfs4_ff_layout_stat_io_end_write(task,
- FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
- hdr->args.count, hdr->res.count,
- hdr->res.verf->committed);
-
if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
task->tk_status == 0) {
nfs4_sequence_done(task, &hdr->res.seq_res);
@@ -1499,18 +1557,53 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data)
{
struct nfs_pgio_header *hdr = data;
+ ff_layout_write_record_layoutstats_done(task, hdr);
rpc_count_iostats_metrics(task,
&NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]);
}
-static void ff_layout_commit_prepare_common(struct rpc_task *task,
+static void ff_layout_write_release(void *data)
+{
+ struct nfs_pgio_header *hdr = data;
+
+ ff_layout_write_record_layoutstats_done(&hdr->task, hdr);
+ pnfs_generic_rw_release(data);
+}
+
+static void ff_layout_commit_record_layoutstats_start(struct rpc_task *task,
struct nfs_commit_data *cdata)
{
+ if (test_and_set_bit(NFS_IOHDR_STAT, &cdata->flags))
+ return;
nfs4_ff_layout_stat_io_start_write(cdata->inode,
FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
0, task->tk_start);
}
+static void ff_layout_commit_record_layoutstats_done(struct rpc_task *task,
+ struct nfs_commit_data *cdata)
+{
+ struct nfs_page *req;
+ __u64 count = 0;
+
+ if (!test_and_clear_bit(NFS_IOHDR_STAT, &cdata->flags))
+ return;
+
+ if (task->tk_status == 0) {
+ list_for_each_entry(req, &cdata->pages, wb_list)
+ count += req->wb_bytes;
+ }
+ nfs4_ff_layout_stat_io_end_write(task,
+ FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
+ count, count, NFS_FILE_SYNC);
+}
+
+static void ff_layout_commit_prepare_common(struct rpc_task *task,
+ struct nfs_commit_data *cdata)
+{
+ ff_layout_commit_record_layoutstats_start(task, cdata);
+}
+
static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data)
{
ff_layout_commit_prepare_common(task, data);
@@ -1531,19 +1624,6 @@ static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data)
static void ff_layout_commit_done(struct rpc_task *task, void *data)
{
- struct nfs_commit_data *cdata = data;
- struct nfs_page *req;
- __u64 count = 0;
-
- if (task->tk_status == 0) {
- list_for_each_entry(req, &cdata->pages, wb_list)
- count += req->wb_bytes;
- }
-
- nfs4_ff_layout_stat_io_end_write(task,
- FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
- count, count, NFS_FILE_SYNC);
-
pnfs_generic_write_commit_done(task, data);
}
@@ -1551,50 +1631,59 @@ static void ff_layout_commit_count_stats(struct rpc_task *task, void *data)
{
struct nfs_commit_data *cdata = data;
+ ff_layout_commit_record_layoutstats_done(task, cdata);
rpc_count_iostats_metrics(task,
&NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]);
}
+static void ff_layout_commit_release(void *data)
+{
+ struct nfs_commit_data *cdata = data;
+
+ ff_layout_commit_record_layoutstats_done(&cdata->task, cdata);
+ pnfs_generic_commit_release(data);
+}
+
static const struct rpc_call_ops ff_layout_read_call_ops_v3 = {
.rpc_call_prepare = ff_layout_read_prepare_v3,
.rpc_call_done = ff_layout_read_call_done,
.rpc_count_stats = ff_layout_read_count_stats,
- .rpc_release = pnfs_generic_rw_release,
+ .rpc_release = ff_layout_read_release,
};
static const struct rpc_call_ops ff_layout_read_call_ops_v4 = {
.rpc_call_prepare = ff_layout_read_prepare_v4,
.rpc_call_done = ff_layout_read_call_done,
.rpc_count_stats = ff_layout_read_count_stats,
- .rpc_release = pnfs_generic_rw_release,
+ .rpc_release = ff_layout_read_release,
};
static const struct rpc_call_ops ff_layout_write_call_ops_v3 = {
.rpc_call_prepare = ff_layout_write_prepare_v3,
.rpc_call_done = ff_layout_write_call_done,
.rpc_count_stats = ff_layout_write_count_stats,
- .rpc_release = pnfs_generic_rw_release,
+ .rpc_release = ff_layout_write_release,
};
static const struct rpc_call_ops ff_layout_write_call_ops_v4 = {
.rpc_call_prepare = ff_layout_write_prepare_v4,
.rpc_call_done = ff_layout_write_call_done,
.rpc_count_stats = ff_layout_write_count_stats,
- .rpc_release = pnfs_generic_rw_release,
+ .rpc_release = ff_layout_write_release,
};
static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = {
.rpc_call_prepare = ff_layout_commit_prepare_v3,
.rpc_call_done = ff_layout_commit_done,
.rpc_count_stats = ff_layout_commit_count_stats,
- .rpc_release = pnfs_generic_commit_release,
+ .rpc_release = ff_layout_commit_release,
};
static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = {
.rpc_call_prepare = ff_layout_commit_prepare_v4,
.rpc_call_done = ff_layout_commit_done,
.rpc_count_stats = ff_layout_commit_count_stats,
- .rpc_release = pnfs_generic_commit_release,
+ .rpc_release = ff_layout_commit_release,
};
static enum pnfs_try_status
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index 2bb08bc6a..dd353bb7d 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -85,6 +85,7 @@ struct nfs4_ff_layout_mirror {
struct nfs4_ff_layoutstat write_stat;
ktime_t start_time;
ktime_t last_report_time;
+ u32 report_interval;
};
struct nfs4_ff_layout_segment {
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index e125e55de..eb370460c 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -218,63 +218,55 @@ static void extend_ds_error(struct nfs4_ff_layout_ds_err *err,
err->length = end - err->offset;
}
-static bool ds_error_can_merge(struct nfs4_ff_layout_ds_err *err, u64 offset,
- u64 length, int status, enum nfs_opnum4 opnum,
- nfs4_stateid *stateid,
- struct nfs4_deviceid *deviceid)
+static int
+ff_ds_error_match(const struct nfs4_ff_layout_ds_err *e1,
+ const struct nfs4_ff_layout_ds_err *e2)
{
- return err->status == status && err->opnum == opnum &&
- nfs4_stateid_match(&err->stateid, stateid) &&
- !memcmp(&err->deviceid, deviceid, sizeof(*deviceid)) &&
- end_offset(err->offset, err->length) >= offset &&
- err->offset <= end_offset(offset, length);
-}
-
-static bool merge_ds_error(struct nfs4_ff_layout_ds_err *old,
- struct nfs4_ff_layout_ds_err *new)
-{
- if (!ds_error_can_merge(old, new->offset, new->length, new->status,
- new->opnum, &new->stateid, &new->deviceid))
- return false;
-
- extend_ds_error(old, new->offset, new->length);
- return true;
+ int ret;
+
+ if (e1->opnum != e2->opnum)
+ return e1->opnum < e2->opnum ? -1 : 1;
+ if (e1->status != e2->status)
+ return e1->status < e2->status ? -1 : 1;
+ ret = memcmp(&e1->stateid, &e2->stateid, sizeof(e1->stateid));
+ if (ret != 0)
+ return ret;
+ ret = memcmp(&e1->deviceid, &e2->deviceid, sizeof(e1->deviceid));
+ if (ret != 0)
+ return ret;
+ if (end_offset(e1->offset, e1->length) < e2->offset)
+ return -1;
+ if (e1->offset > end_offset(e2->offset, e2->length))
+ return 1;
+ /* If ranges overlap or are contiguous, they are the same */
+ return 0;
}
-static bool
+static void
ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo,
struct nfs4_ff_layout_ds_err *dserr)
{
- struct nfs4_ff_layout_ds_err *err;
-
- list_for_each_entry(err, &flo->error_list, list) {
- if (merge_ds_error(err, dserr)) {
- return true;
- }
- }
-
- list_add(&dserr->list, &flo->error_list);
- return false;
-}
-
-static bool
-ff_layout_update_ds_error(struct nfs4_flexfile_layout *flo, u64 offset,
- u64 length, int status, enum nfs_opnum4 opnum,
- nfs4_stateid *stateid, struct nfs4_deviceid *deviceid)
-{
- bool found = false;
- struct nfs4_ff_layout_ds_err *err;
-
- list_for_each_entry(err, &flo->error_list, list) {
- if (ds_error_can_merge(err, offset, length, status, opnum,
- stateid, deviceid)) {
- found = true;
- extend_ds_error(err, offset, length);
+ struct nfs4_ff_layout_ds_err *err, *tmp;
+ struct list_head *head = &flo->error_list;
+ int match;
+
+ /* Do insertion sort w/ merges */
+ list_for_each_entry_safe(err, tmp, &flo->error_list, list) {
+ match = ff_ds_error_match(err, dserr);
+ if (match < 0)
+ continue;
+ if (match > 0) {
+ /* Add entry "dserr" _before_ entry "err" */
+ head = &err->list;
break;
}
+ /* Entries match, so merge "err" into "dserr" */
+ extend_ds_error(dserr, err->offset, err->length);
+ list_del(&err->list);
+ kfree(err);
}
- return found;
+ list_add_tail(&dserr->list, head);
}
int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
@@ -283,7 +275,6 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
gfp_t gfp_flags)
{
struct nfs4_ff_layout_ds_err *dserr;
- bool needfree;
if (status == 0)
return 0;
@@ -291,14 +282,6 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
if (mirror->mirror_ds == NULL)
return -EINVAL;
- spin_lock(&flo->generic_hdr.plh_inode->i_lock);
- if (ff_layout_update_ds_error(flo, offset, length, status, opnum,
- &mirror->stateid,
- &mirror->mirror_ds->id_node.deviceid)) {
- spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
- return 0;
- }
- spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
dserr = kmalloc(sizeof(*dserr), gfp_flags);
if (!dserr)
return -ENOMEM;
@@ -313,10 +296,8 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
NFS4_DEVICEID4_SIZE);
spin_lock(&flo->generic_hdr.plh_inode->i_lock);
- needfree = ff_layout_add_ds_error_locked(flo, dserr);
+ ff_layout_add_ds_error_locked(flo, dserr);
spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
- if (needfree)
- kfree(dserr);
return 0;
}
@@ -429,22 +410,14 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
mirror, lseg->pls_range.offset,
lseg->pls_range.length, NFS4ERR_NXIO,
OP_ILLEGAL, GFP_NOIO);
- if (fail_return) {
- pnfs_error_mark_layout_for_return(ino, lseg);
+ if (!fail_return) {
if (ff_layout_has_available_ds(lseg))
- pnfs_set_retry_layoutget(lseg->pls_layout);
- else
- pnfs_clear_retry_layoutget(lseg->pls_layout);
-
- } else {
- if (ff_layout_has_available_ds(lseg))
- set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+ set_bit(NFS_LAYOUT_RETURN_REQUESTED,
&lseg->pls_layout->plh_flags);
- else {
+ else
pnfs_error_mark_layout_for_return(ino, lseg);
- pnfs_clear_retry_layoutget(lseg->pls_layout);
- }
- }
+ } else
+ pnfs_error_mark_layout_for_return(ino, lseg);
}
out_update_creds:
if (ff_layout_update_mirror_cred(mirror, ds))
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 3e2071a17..86faecf8f 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -71,19 +71,25 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
return nfs_fileid_to_ino_t(fattr->fileid);
}
-/**
- * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks
- * @word: long word containing the bit lock
- */
-int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
+static int nfs_wait_killable(int mode)
{
freezable_schedule_unsafe();
if (signal_pending_state(mode, current))
return -ERESTARTSYS;
return 0;
}
+
+int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
+{
+ return nfs_wait_killable(mode);
+}
EXPORT_SYMBOL_GPL(nfs_wait_bit_killable);
+int nfs_wait_atomic_killable(atomic_t *p)
+{
+ return nfs_wait_killable(TASK_KILLABLE);
+}
+
/**
* nfs_compat_user_ino64 - returns the user-visible inode number
* @fileid: 64-bit fileid
@@ -408,9 +414,10 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
inode->i_fop = NULL;
inode->i_flags |= S_AUTOMOUNT;
}
- } else if (S_ISLNK(inode->i_mode))
+ } else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &nfs_symlink_inode_operations;
- else
+ inode_nohighmem(inode);
+ } else
init_special_inode(inode, inode->i_mode, fattr->rdev);
memset(&inode->i_atime, 0, sizeof(inode->i_atime));
@@ -654,9 +661,9 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
trace_nfs_getattr_enter(inode);
/* Flush out writes to the server in order to update c/mtime. */
if (S_ISREG(inode->i_mode)) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
err = nfs_sync_inode(inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (err)
goto out;
}
@@ -699,7 +706,7 @@ static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
l_ctx->lockowner.l_owner = current->files;
l_ctx->lockowner.l_pid = current->tgid;
INIT_LIST_HEAD(&l_ctx->list);
- nfs_iocounter_init(&l_ctx->io_count);
+ atomic_set(&l_ctx->io_count, 0);
}
static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx)
@@ -912,6 +919,12 @@ void nfs_file_clear_open_context(struct file *filp)
if (ctx) {
struct inode *inode = d_inode(ctx->dentry);
+ /*
+ * We fatal error on write before. Try to writeback
+ * every page again.
+ */
+ if (ctx->error < 0)
+ invalidate_inode_pages2(inode->i_mapping);
filp->private_data = NULL;
spin_lock(&inode->i_lock);
list_move_tail(&ctx->list, &NFS_I(inode)->open_files);
@@ -1086,6 +1099,27 @@ static bool nfs_mapping_need_revalidate_inode(struct inode *inode)
|| NFS_STALE(inode);
}
+int nfs_revalidate_mapping_rcu(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ unsigned long *bitlock = &nfsi->flags;
+ int ret = 0;
+
+ if (IS_SWAPFILE(inode))
+ goto out;
+ if (nfs_mapping_need_revalidate_inode(inode)) {
+ ret = -ECHILD;
+ goto out;
+ }
+ spin_lock(&inode->i_lock);
+ if (test_bit(NFS_INO_INVALIDATING, bitlock) ||
+ (nfsi->cache_validity & NFS_INO_INVALID_DATA))
+ ret = -ECHILD;
+ spin_unlock(&inode->i_lock);
+out:
+ return ret;
+}
+
/**
* __nfs_revalidate_mapping - Revalidate the pagecache
* @inode - pointer to host inode
@@ -1144,9 +1178,9 @@ static int __nfs_revalidate_mapping(struct inode *inode,
spin_unlock(&inode->i_lock);
trace_nfs_invalidate_mapping_enter(inode);
if (may_lock) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = nfs_invalidate_mapping(inode, mapping);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
} else
ret = nfs_invalidate_mapping(inode, mapping);
trace_nfs_invalidate_mapping_exit(inode, ret);
@@ -1935,7 +1969,7 @@ static int __init nfs_init_inodecache(void)
nfs_inode_cachep = kmem_cache_create("nfs_inode_cache",
sizeof(struct nfs_inode),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
init_once);
if (nfs_inode_cachep == NULL)
return -ENOMEM;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 9dea85f7f..9a547aa3e 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -238,7 +238,7 @@ extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr,
void (*release)(struct nfs_pgio_header *hdr));
void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
-int nfs_iocounter_wait(struct nfs_io_counter *c);
+int nfs_iocounter_wait(struct nfs_lock_context *l_ctx);
extern const struct nfs_pageio_ops nfs_pgio_rw_ops;
struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *);
@@ -252,18 +252,18 @@ void nfs_free_request(struct nfs_page *req);
struct nfs_pgio_mirror *
nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc);
-static inline void nfs_iocounter_init(struct nfs_io_counter *c)
-{
- c->flags = 0;
- atomic_set(&c->io_count, 0);
-}
-
static inline bool nfs_pgio_has_mirroring(struct nfs_pageio_descriptor *desc)
{
WARN_ON_ONCE(desc->pg_mirror_count < 1);
return desc->pg_mirror_count > 1;
}
+static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1,
+ const struct nfs_open_context *ctx2)
+{
+ return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state;
+}
+
/* nfs2xdr.c */
extern struct rpc_procinfo nfs_procedures[];
extern int nfs2_decode_dirent(struct xdr_stream *,
@@ -380,6 +380,7 @@ extern void nfs_clear_inode(struct inode *);
extern void nfs_evict_inode(struct inode *);
void nfs_zap_acl_cache(struct inode *inode);
extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode);
+extern int nfs_wait_atomic_killable(atomic_t *p);
/* super.c */
extern const struct super_operations nfs_sops;
@@ -483,7 +484,7 @@ void nfs_retry_commit(struct list_head *page_list,
struct nfs_commit_info *cinfo,
u32 ds_commit_idx);
void nfs_commitdata_release(struct nfs_commit_data *data);
-void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
+void nfs_request_add_commit_list(struct nfs_page *req,
struct nfs_commit_info *cinfo);
void nfs_request_add_commit_list_locked(struct nfs_page *req,
struct list_head *dst,
@@ -519,7 +520,6 @@ static inline void nfs_inode_dio_wait(struct inode *inode)
inode_dio_wait(inode);
}
extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
-extern void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq);
/* nfs4proc.c */
extern void __nfs4_read_done_cb(struct nfs_pgio_header *);
@@ -696,9 +696,32 @@ static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh)
{
return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size);
}
+static inline u32 nfs_stateid_hash(const nfs4_stateid *stateid)
+{
+ return ~crc32_le(0xFFFFFFFF, &stateid->other[0],
+ NFS4_STATEID_OTHER_SIZE);
+}
#else
static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh)
{
return 0;
}
+static inline u32 nfs_stateid_hash(nfs4_stateid *stateid)
+{
+ return 0;
+}
#endif
+
+static inline bool nfs_error_is_fatal(int err)
+{
+ switch (err) {
+ case -ERESTARTSYS:
+ case -EIO:
+ case -ENOSPC:
+ case -EROFS:
+ case -E2BIG:
+ return true;
+ default:
+ return false;
+ }
+}
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 1ebe2fc7c..17c0fa1ec 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -284,12 +284,12 @@ nfs3_listxattr(struct dentry *dentry, char *data, size_t size)
int error;
error = nfs3_list_one_acl(inode, ACL_TYPE_ACCESS,
- POSIX_ACL_XATTR_ACCESS, data, size, &result);
+ XATTR_NAME_POSIX_ACL_ACCESS, data, size, &result);
if (error)
return error;
error = nfs3_list_one_acl(inode, ACL_TYPE_DEFAULT,
- POSIX_ACL_XATTR_DEFAULT, data, size, &result);
+ XATTR_NAME_POSIX_ACL_DEFAULT, data, size, &result);
if (error)
return error;
return result;
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 6b1ce9825..dff83460e 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -16,29 +16,8 @@
#define NFSDBG_FACILITY NFSDBG_PROC
-static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file,
- fmode_t fmode)
-{
- struct nfs_open_context *open;
- struct nfs_lock_context *lock;
- int ret;
-
- open = get_nfs_open_context(nfs_file_open_context(file));
- lock = nfs_get_lock_context(open);
- if (IS_ERR(lock)) {
- put_nfs_open_context(open);
- return PTR_ERR(lock);
- }
-
- ret = nfs4_set_rw_stateid(dst, open, lock, fmode);
-
- nfs_put_lock_context(lock);
- put_nfs_open_context(open);
- return ret;
-}
-
static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
- loff_t offset, loff_t len)
+ struct nfs_lock_context *lock, loff_t offset, loff_t len)
{
struct inode *inode = file_inode(filep);
struct nfs_server *server = NFS_SERVER(inode);
@@ -56,7 +35,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
msg->rpc_argp = &args;
msg->rpc_resp = &res;
- status = nfs42_set_rw_stateid(&args.falloc_stateid, filep, FMODE_WRITE);
+ status = nfs4_set_rw_stateid(&args.falloc_stateid, lock->open_context,
+ lock, FMODE_WRITE);
if (status)
return status;
@@ -78,15 +58,26 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
{
struct nfs_server *server = NFS_SERVER(file_inode(filep));
struct nfs4_exception exception = { };
+ struct nfs_lock_context *lock;
int err;
+ lock = nfs_get_lock_context(nfs_file_open_context(filep));
+ if (IS_ERR(lock))
+ return PTR_ERR(lock);
+
+ exception.inode = file_inode(filep);
+ exception.state = lock->open_context->state;
+
do {
- err = _nfs42_proc_fallocate(msg, filep, offset, len);
- if (err == -ENOTSUPP)
- return -EOPNOTSUPP;
+ err = _nfs42_proc_fallocate(msg, filep, lock, offset, len);
+ if (err == -ENOTSUPP) {
+ err = -EOPNOTSUPP;
+ break;
+ }
err = nfs4_handle_exception(server, err, &exception);
} while (exception.retry);
+ nfs_put_lock_context(lock);
return err;
}
@@ -101,13 +92,13 @@ int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len)
if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE))
return -EOPNOTSUPP;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
err = nfs42_proc_fallocate(&msg, filep, offset, len);
if (err == -EOPNOTSUPP)
NFS_SERVER(inode)->caps &= ~NFS_CAP_ALLOCATE;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
@@ -123,7 +114,7 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
return -EOPNOTSUPP;
nfs_wb_all(inode);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
err = nfs42_proc_fallocate(&msg, filep, offset, len);
if (err == 0)
@@ -131,11 +122,12 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
if (err == -EOPNOTSUPP)
NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
-static loff_t _nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
+static loff_t _nfs42_proc_llseek(struct file *filep,
+ struct nfs_lock_context *lock, loff_t offset, int whence)
{
struct inode *inode = file_inode(filep);
struct nfs42_seek_args args = {
@@ -156,7 +148,8 @@ static loff_t _nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
if (!nfs_server_capable(inode, NFS_CAP_SEEK))
return -ENOTSUPP;
- status = nfs42_set_rw_stateid(&args.sa_stateid, filep, FMODE_READ);
+ status = nfs4_set_rw_stateid(&args.sa_stateid, lock->open_context,
+ lock, FMODE_READ);
if (status)
return status;
@@ -175,17 +168,28 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
{
struct nfs_server *server = NFS_SERVER(file_inode(filep));
struct nfs4_exception exception = { };
+ struct nfs_lock_context *lock;
loff_t err;
+ lock = nfs_get_lock_context(nfs_file_open_context(filep));
+ if (IS_ERR(lock))
+ return PTR_ERR(lock);
+
+ exception.inode = file_inode(filep);
+ exception.state = lock->open_context->state;
+
do {
- err = _nfs42_proc_llseek(filep, offset, whence);
+ err = _nfs42_proc_llseek(filep, lock, offset, whence);
if (err >= 0)
break;
- if (err == -ENOTSUPP)
- return -EOPNOTSUPP;
+ if (err == -ENOTSUPP) {
+ err = -EOPNOTSUPP;
+ break;
+ }
err = nfs4_handle_exception(server, err, &exception);
} while (exception.retry);
+ nfs_put_lock_context(lock);
return err;
}
@@ -204,6 +208,8 @@ static void
nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
{
struct nfs42_layoutstat_data *data = calldata;
+ struct inode *inode = data->inode;
+ struct pnfs_layout_hdr *lo;
if (!nfs4_sequence_done(task, &data->res.seq_res))
return;
@@ -211,12 +217,35 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
switch (task->tk_status) {
case 0:
break;
+ case -NFS4ERR_EXPIRED:
+ case -NFS4ERR_STALE_STATEID:
+ case -NFS4ERR_OLD_STATEID:
+ case -NFS4ERR_BAD_STATEID:
+ spin_lock(&inode->i_lock);
+ lo = NFS_I(inode)->layout;
+ if (lo && nfs4_stateid_match(&data->args.stateid,
+ &lo->plh_stateid)) {
+ LIST_HEAD(head);
+
+ /*
+ * Mark the bad layout state as invalid, then retry
+ * with the current stateid.
+ */
+ set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+ pnfs_mark_matching_lsegs_invalid(lo, &head, NULL);
+ spin_unlock(&inode->i_lock);
+ pnfs_free_lseg_list(&head);
+ } else
+ spin_unlock(&inode->i_lock);
+ break;
case -ENOTSUPP:
case -EOPNOTSUPP:
- NFS_SERVER(data->inode)->caps &= ~NFS_CAP_LAYOUTSTATS;
+ NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTSTATS;
default:
- dprintk("%s server returns %d\n", __func__, task->tk_status);
+ break;
}
+
+ dprintk("%s server returns %d\n", __func__, task->tk_status);
}
static void
@@ -273,8 +302,9 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server,
}
static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
- struct file *dst_f, loff_t src_offset,
- loff_t dst_offset, loff_t count)
+ struct file *dst_f, struct nfs_lock_context *src_lock,
+ struct nfs_lock_context *dst_lock, loff_t src_offset,
+ loff_t dst_offset, loff_t count)
{
struct inode *src_inode = file_inode(src_f);
struct inode *dst_inode = file_inode(dst_f);
@@ -295,11 +325,13 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
msg->rpc_argp = &args;
msg->rpc_resp = &res;
- status = nfs42_set_rw_stateid(&args.src_stateid, src_f, FMODE_READ);
+ status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context,
+ src_lock, FMODE_READ);
if (status)
return status;
- status = nfs42_set_rw_stateid(&args.dst_stateid, dst_f, FMODE_WRITE);
+ status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context,
+ dst_lock, FMODE_WRITE);
if (status)
return status;
@@ -324,22 +356,48 @@ int nfs42_proc_clone(struct file *src_f, struct file *dst_f,
};
struct inode *inode = file_inode(src_f);
struct nfs_server *server = NFS_SERVER(file_inode(src_f));
- struct nfs4_exception exception = { };
- int err;
+ struct nfs_lock_context *src_lock;
+ struct nfs_lock_context *dst_lock;
+ struct nfs4_exception src_exception = { };
+ struct nfs4_exception dst_exception = { };
+ int err, err2;
if (!nfs_server_capable(inode, NFS_CAP_CLONE))
return -EOPNOTSUPP;
+ src_lock = nfs_get_lock_context(nfs_file_open_context(src_f));
+ if (IS_ERR(src_lock))
+ return PTR_ERR(src_lock);
+
+ src_exception.inode = file_inode(src_f);
+ src_exception.state = src_lock->open_context->state;
+
+ dst_lock = nfs_get_lock_context(nfs_file_open_context(dst_f));
+ if (IS_ERR(dst_lock)) {
+ err = PTR_ERR(dst_lock);
+ goto out_put_src_lock;
+ }
+
+ dst_exception.inode = file_inode(dst_f);
+ dst_exception.state = dst_lock->open_context->state;
+
do {
- err = _nfs42_proc_clone(&msg, src_f, dst_f, src_offset,
- dst_offset, count);
+ err = _nfs42_proc_clone(&msg, src_f, dst_f, src_lock, dst_lock,
+ src_offset, dst_offset, count);
if (err == -ENOTSUPP || err == -EOPNOTSUPP) {
NFS_SERVER(inode)->caps &= ~NFS_CAP_CLONE;
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ break;
}
- err = nfs4_handle_exception(server, err, &exception);
- } while (exception.retry);
- return err;
+ err2 = nfs4_handle_exception(server, err, &src_exception);
+ err = nfs4_handle_exception(server, err, &dst_exception);
+ if (!err)
+ err = err2;
+ } while (src_exception.retry || dst_exception.retry);
+ nfs_put_lock_context(dst_lock);
+out_put_src_lock:
+ nfs_put_lock_context(src_lock);
+ return err;
}
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index db9b5fea5..57ca1c803 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -141,11 +141,11 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret != 0)
break;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = nfs_file_fsync_commit(file, start, end, datasync);
if (!ret)
ret = pnfs_sync_inode(inode, !!datasync);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/*
* If nfs_file_fsync_commit detected a server reboot, then
* resend all dirty pages that might have been covered by
@@ -195,75 +195,37 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t
return nfs42_proc_allocate(filep, offset, len);
}
-static noinline long
-nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,
- u64 src_off, u64 dst_off, u64 count)
+static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
+ struct file *dst_file, loff_t dst_off, u64 count)
{
struct inode *dst_inode = file_inode(dst_file);
struct nfs_server *server = NFS_SERVER(dst_inode);
- struct fd src_file;
- struct inode *src_inode;
+ struct inode *src_inode = file_inode(src_file);
unsigned int bs = server->clone_blksize;
bool same_inode = false;
int ret;
- /* dst file must be opened for writing */
- if (!(dst_file->f_mode & FMODE_WRITE))
- return -EINVAL;
-
- ret = mnt_want_write_file(dst_file);
- if (ret)
- return ret;
-
- src_file = fdget(srcfd);
- if (!src_file.file) {
- ret = -EBADF;
- goto out_drop_write;
- }
-
- src_inode = file_inode(src_file.file);
-
- if (src_inode == dst_inode)
- same_inode = true;
-
- /* src file must be opened for reading */
- if (!(src_file.file->f_mode & FMODE_READ))
- goto out_fput;
-
- /* src and dst must be regular files */
- ret = -EISDIR;
- if (!S_ISREG(src_inode->i_mode) || !S_ISREG(dst_inode->i_mode))
- goto out_fput;
-
- ret = -EXDEV;
- if (src_file.file->f_path.mnt != dst_file->f_path.mnt ||
- src_inode->i_sb != dst_inode->i_sb)
- goto out_fput;
-
/* check alignment w.r.t. clone_blksize */
ret = -EINVAL;
if (bs) {
if (!IS_ALIGNED(src_off, bs) || !IS_ALIGNED(dst_off, bs))
- goto out_fput;
+ goto out;
if (!IS_ALIGNED(count, bs) && i_size_read(src_inode) != (src_off + count))
- goto out_fput;
+ goto out;
}
- /* verify if ranges are overlapped within the same file */
- if (same_inode) {
- if (dst_off + count > src_off && dst_off < src_off + count)
- goto out_fput;
- }
+ if (src_inode == dst_inode)
+ same_inode = true;
/* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */
if (same_inode) {
- mutex_lock(&src_inode->i_mutex);
+ inode_lock(src_inode);
} else if (dst_inode < src_inode) {
- mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(dst_inode, I_MUTEX_PARENT);
+ inode_lock_nested(src_inode, I_MUTEX_CHILD);
} else {
- mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(src_inode, I_MUTEX_PARENT);
+ inode_lock_nested(dst_inode, I_MUTEX_CHILD);
}
/* flush all pending writes on both src and dst so that server
@@ -275,7 +237,7 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,
if (ret)
goto out_unlock;
- ret = nfs42_proc_clone(src_file.file, dst_file, src_off, dst_off, count);
+ ret = nfs42_proc_clone(src_file, dst_file, src_off, dst_off, count);
/* truncate inode page cache of the dst range so that future reads can fetch
* new data from server */
@@ -284,45 +246,17 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,
out_unlock:
if (same_inode) {
- mutex_unlock(&src_inode->i_mutex);
+ inode_unlock(src_inode);
} else if (dst_inode < src_inode) {
- mutex_unlock(&src_inode->i_mutex);
- mutex_unlock(&dst_inode->i_mutex);
+ inode_unlock(src_inode);
+ inode_unlock(dst_inode);
} else {
- mutex_unlock(&dst_inode->i_mutex);
- mutex_unlock(&src_inode->i_mutex);
+ inode_unlock(dst_inode);
+ inode_unlock(src_inode);
}
-out_fput:
- fdput(src_file);
-out_drop_write:
- mnt_drop_write_file(dst_file);
+out:
return ret;
}
-
-static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp)
-{
- struct btrfs_ioctl_clone_range_args args;
-
- if (copy_from_user(&args, argp, sizeof(args)))
- return -EFAULT;
-
- return nfs42_ioctl_clone(dst_file, args.src_fd, args.src_offset,
- args.dest_offset, args.src_length);
-}
-
-long nfs4_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
- void __user *argp = (void __user *)arg;
-
- switch (cmd) {
- case BTRFS_IOC_CLONE:
- return nfs42_ioctl_clone(file, arg, 0, 0, 0);
- case BTRFS_IOC_CLONE_RANGE:
- return nfs42_ioctl_clone_range(file, argp);
- }
-
- return -ENOTTY;
-}
#endif /* CONFIG_NFS_V4_2 */
const struct file_operations nfs4_file_operations = {
@@ -342,8 +276,7 @@ const struct file_operations nfs4_file_operations = {
#ifdef CONFIG_NFS_V4_2
.llseek = nfs4_file_llseek,
.fallocate = nfs42_fallocate,
- .unlocked_ioctl = nfs4_ioctl,
- .compat_ioctl = nfs4_ioctl,
+ .clone_file_range = nfs42_clone_file_range,
#else
.llseek = nfs_file_llseek,
#endif
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 98a441573..14881594d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -208,6 +208,9 @@ static const u32 nfs4_pnfs_open_bitmap[3] = {
| FATTR4_WORD1_TIME_METADATA
| FATTR4_WORD1_TIME_MODIFY,
FATTR4_WORD2_MDSTHRESHOLD
+#ifdef CONFIG_NFS_V4_SECURITY_LABEL
+ | FATTR4_WORD2_SECURITY_LABEL
+#endif
};
static const u32 nfs4_open_noattr_bitmap[3] = {
@@ -1598,6 +1601,7 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
if (!data->rpc_done) {
state = nfs4_try_open_cached(data);
+ trace_nfs4_cached_open(data->state);
goto out;
}
@@ -2015,6 +2019,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
}
return;
unlock_no_action:
+ trace_nfs4_cached_open(data->state);
rcu_read_unlock();
out_no_action:
task->tk_action = NULL;
@@ -2703,6 +2708,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
if (status == 0 && state != NULL)
renew_lease(server, timestamp);
+ trace_nfs4_setattr(inode, &arg.stateid, status);
return status;
}
@@ -2719,7 +2725,6 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
int err;
do {
err = _nfs4_do_setattr(inode, cred, fattr, sattr, state, ilabel, olabel);
- trace_nfs4_setattr(inode, err);
switch (err) {
case -NFS4ERR_OPENMODE:
if (!(sattr->ia_valid & ATTR_SIZE)) {
@@ -5048,7 +5053,6 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp,
static int
nfs4_init_nonuniform_client_string(struct nfs_client *clp)
{
- int result;
size_t len;
char *str;
@@ -5076,7 +5080,7 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp)
return -ENOMEM;
rcu_read_lock();
- result = scnprintf(str, len, "Linux NFSv4.0 %s/%s %s",
+ scnprintf(str, len, "Linux NFSv4.0 %s/%s %s",
clp->cl_ipaddr,
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR),
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO));
@@ -5089,7 +5093,6 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp)
static int
nfs4_init_uniquifier_client_string(struct nfs_client *clp)
{
- int result;
size_t len;
char *str;
@@ -5109,7 +5112,7 @@ nfs4_init_uniquifier_client_string(struct nfs_client *clp)
if (!str)
return -ENOMEM;
- result = scnprintf(str, len, "Linux NFSv%u.%u %s/%s",
+ scnprintf(str, len, "Linux NFSv%u.%u %s/%s",
clp->rpc_ops->version, clp->cl_minorversion,
nfs4_client_id_uniquifier,
clp->cl_rpcclient->cl_nodename);
@@ -5120,7 +5123,6 @@ nfs4_init_uniquifier_client_string(struct nfs_client *clp)
static int
nfs4_init_uniform_client_string(struct nfs_client *clp)
{
- int result;
size_t len;
char *str;
@@ -5145,7 +5147,7 @@ nfs4_init_uniform_client_string(struct nfs_client *clp)
if (!str)
return -ENOMEM;
- result = scnprintf(str, len, "Linux NFSv%u.%u %s",
+ scnprintf(str, len, "Linux NFSv%u.%u %s",
clp->rpc_ops->version, clp->cl_minorversion,
clp->cl_rpcclient->cl_nodename);
clp->cl_owner_id = str;
@@ -5384,6 +5386,11 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
if (data == NULL)
return -ENOMEM;
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
+
+ nfs4_state_protect(server->nfs_client,
+ NFS_SP4_MACH_CRED_CLEANUP,
+ &task_setup_data.rpc_client, &msg);
+
data->args.fhandle = &data->fh;
data->args.stateid = &data->stateid;
data->args.bitmask = server->cache_consistency_bitmask;
@@ -5426,7 +5433,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
int err;
do {
err = _nfs4_proc_delegreturn(inode, cred, stateid, issync);
- trace_nfs4_delegreturn(inode, err);
+ trace_nfs4_delegreturn(inode, stateid, err);
switch (err) {
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
@@ -5936,6 +5943,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
data->cancelled = 1;
rpc_put_task(task);
dprintk("%s: done, ret = %d!\n", __func__, ret);
+ trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret);
return ret;
}
@@ -5952,7 +5960,6 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request
if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
return 0;
err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM);
- trace_nfs4_lock_reclaim(request, state, F_SETLK, err);
if (err != -NFS4ERR_DELAY)
break;
nfs4_handle_exception(server, err, &exception);
@@ -5979,7 +5986,6 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
return 0;
err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_EXPIRED);
- trace_nfs4_lock_expired(request, state, F_SETLK, err);
switch (err) {
default:
goto out;
@@ -6087,7 +6093,6 @@ static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *
do {
err = _nfs4_proc_setlk(state, cmd, request);
- trace_nfs4_set_lock(request, state, cmd, err);
if (err == -NFS4ERR_DENIED)
err = -EAGAIN;
err = nfs4_handle_exception(NFS_SERVER(state->inode),
@@ -6253,9 +6258,6 @@ static int nfs4_xattr_set_nfs4_acl(const struct xattr_handler *handler,
const void *buf, size_t buflen,
int flags)
{
- if (strcmp(key, "") != 0)
- return -EINVAL;
-
return nfs4_proc_set_acl(d_inode(dentry), buf, buflen);
}
@@ -6263,32 +6265,15 @@ static int nfs4_xattr_get_nfs4_acl(const struct xattr_handler *handler,
struct dentry *dentry, const char *key,
void *buf, size_t buflen)
{
- if (strcmp(key, "") != 0)
- return -EINVAL;
-
return nfs4_proc_get_acl(d_inode(dentry), buf, buflen);
}
-static size_t nfs4_xattr_list_nfs4_acl(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_len, const char *name,
- size_t name_len)
+static bool nfs4_xattr_list_nfs4_acl(struct dentry *dentry)
{
- size_t len = sizeof(XATTR_NAME_NFSV4_ACL);
-
- if (!nfs4_server_supports_acls(NFS_SERVER(d_inode(dentry))))
- return 0;
-
- if (list && len <= list_len)
- memcpy(list, XATTR_NAME_NFSV4_ACL, len);
- return len;
+ return nfs4_server_supports_acls(NFS_SERVER(d_inode(dentry)));
}
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
-static inline int nfs4_server_supports_labels(struct nfs_server *server)
-{
- return server->caps & NFS_CAP_SECURITY_LABEL;
-}
static int nfs4_xattr_set_nfs4_label(const struct xattr_handler *handler,
struct dentry *dentry, const char *key,
@@ -6310,29 +6295,34 @@ static int nfs4_xattr_get_nfs4_label(const struct xattr_handler *handler,
return -EOPNOTSUPP;
}
-static size_t nfs4_xattr_list_nfs4_label(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_len, const char *name,
- size_t name_len)
+static ssize_t
+nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len)
{
- size_t len = 0;
+ int len = 0;
- if (nfs_server_capable(d_inode(dentry), NFS_CAP_SECURITY_LABEL)) {
- len = security_inode_listsecurity(d_inode(dentry), NULL, 0);
- if (list && len <= list_len)
- security_inode_listsecurity(d_inode(dentry), list, len);
+ if (nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) {
+ len = security_inode_listsecurity(inode, list, list_len);
+ if (list_len && len > list_len)
+ return -ERANGE;
}
return len;
}
static const struct xattr_handler nfs4_xattr_nfs4_label_handler = {
.prefix = XATTR_SECURITY_PREFIX,
- .list = nfs4_xattr_list_nfs4_label,
.get = nfs4_xattr_get_nfs4_label,
.set = nfs4_xattr_set_nfs4_label,
};
-#endif
+#else
+
+static ssize_t
+nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len)
+{
+ return 0;
+}
+
+#endif
/*
* nfs_fhget will use either the mounted_on_fileid or the fileid
@@ -6862,10 +6852,13 @@ static const struct nfs41_state_protection nfs4_sp4_mach_cred_request = {
},
.allow.u.words = {
[0] = 1 << (OP_CLOSE) |
+ 1 << (OP_OPEN_DOWNGRADE) |
1 << (OP_LOCKU) |
+ 1 << (OP_DELEGRETURN) |
1 << (OP_COMMIT),
[1] = 1 << (OP_SECINFO - 32) |
1 << (OP_SECINFO_NO_NAME - 32) |
+ 1 << (OP_LAYOUTRETURN - 32) |
1 << (OP_TEST_STATEID - 32) |
1 << (OP_FREE_STATEID - 32) |
1 << (OP_WRITE - 32)
@@ -6930,11 +6923,19 @@ static int nfs4_sp4_select_mode(struct nfs_client *clp,
}
if (test_bit(OP_CLOSE, sp->allow.u.longs) &&
+ test_bit(OP_OPEN_DOWNGRADE, sp->allow.u.longs) &&
+ test_bit(OP_DELEGRETURN, sp->allow.u.longs) &&
test_bit(OP_LOCKU, sp->allow.u.longs)) {
dfprintk(MOUNT, " cleanup mode enabled\n");
set_bit(NFS_SP4_MACH_CRED_CLEANUP, &clp->cl_sp4_flags);
}
+ if (test_bit(OP_LAYOUTRETURN, sp->allow.u.longs)) {
+ dfprintk(MOUNT, " pnfs cleanup mode enabled\n");
+ set_bit(NFS_SP4_MACH_CRED_PNFS_CLEANUP,
+ &clp->cl_sp4_flags);
+ }
+
if (test_bit(OP_SECINFO, sp->allow.u.longs) &&
test_bit(OP_SECINFO_NO_NAME, sp->allow.u.longs)) {
dfprintk(MOUNT, " secinfo mode enabled\n");
@@ -7763,6 +7764,7 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
struct nfs4_layoutget *lgp = calldata;
struct nfs_server *server = NFS_SERVER(lgp->args.inode);
struct nfs4_session *session = nfs4_get_session(server);
+ int ret;
dprintk("--> %s\n", __func__);
/* Note the is a race here, where a CB_LAYOUTRECALL can come in
@@ -7773,12 +7775,12 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
if (nfs41_setup_sequence(session, &lgp->args.seq_args,
&lgp->res.seq_res, task))
return;
- if (pnfs_choose_layoutget_stateid(&lgp->args.stateid,
+ ret = pnfs_choose_layoutget_stateid(&lgp->args.stateid,
NFS_I(lgp->args.inode)->layout,
&lgp->args.range,
- lgp->args.ctx->state)) {
- rpc_exit(task, NFS4_OK);
- }
+ lgp->args.ctx->state);
+ if (ret < 0)
+ rpc_exit(task, ret);
}
static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
@@ -7798,6 +7800,15 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
switch (task->tk_status) {
case 0:
goto out;
+
+ /*
+ * NFS4ERR_LAYOUTUNAVAILABLE means we are not supposed to use pnfs
+ * on the file. set tk_status to -ENODATA to tell upper layer to
+ * retry go inband.
+ */
+ case -NFS4ERR_LAYOUTUNAVAILABLE:
+ task->tk_status = -ENODATA;
+ goto out;
/*
* NFS4ERR_BADLAYOUT means the MDS cannot return a layout of
* length lgp->args.minlength != 0 (see RFC5661 section 18.43.3).
@@ -7994,6 +8005,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
trace_nfs4_layoutget(lgp->args.ctx,
&lgp->args.range,
&lgp->res.range,
+ &lgp->res.stateid,
status);
/* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
if (status == 0 && lgp->res.layoutp->len)
@@ -8050,9 +8062,10 @@ static void nfs4_layoutreturn_release(void *calldata)
dprintk("--> %s\n", __func__);
spin_lock(&lo->plh_inode->i_lock);
+ pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range);
+ pnfs_mark_layout_returned_if_empty(lo);
if (lrp->res.lrs_present)
pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
- pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range);
pnfs_clear_layoutreturn_waitbit(lo);
spin_unlock(&lo->plh_inode->i_lock);
pnfs_free_lseg_list(&freeme);
@@ -8085,6 +8098,10 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
};
int status = 0;
+ nfs4_state_protect(NFS_SERVER(lrp->args.inode)->nfs_client,
+ NFS_SP4_MACH_CRED_PNFS_CLEANUP,
+ &task_setup_data.rpc_client, &msg);
+
dprintk("--> %s\n", __func__);
if (!sync) {
lrp->inode = nfs_igrab_and_active(lrp->args.inode);
@@ -8100,7 +8117,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
return PTR_ERR(task);
if (sync)
status = task->tk_status;
- trace_nfs4_layoutreturn(lrp->args.inode, status);
+ trace_nfs4_layoutreturn(lrp->args.inode, &lrp->args.stateid, status);
dprintk("<-- %s status=%d\n", __func__, status);
rpc_put_task(task);
return status;
@@ -8248,7 +8265,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
return PTR_ERR(task);
if (sync)
status = task->tk_status;
- trace_nfs4_layoutcommit(data->args.inode, status);
+ trace_nfs4_layoutcommit(data->args.inode, &data->args.stateid, status);
dprintk("%s: status %d\n", __func__, status);
rpc_put_task(task);
return status;
@@ -8748,6 +8765,24 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
#endif
};
+ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)
+{
+ ssize_t error, error2;
+
+ error = generic_listxattr(dentry, list, size);
+ if (error < 0)
+ return error;
+ if (list) {
+ list += error;
+ size -= error;
+ }
+
+ error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, size);
+ if (error2 < 0)
+ return error2;
+ return error + error2;
+}
+
static const struct inode_operations nfs4_dir_inode_operations = {
.create = nfs_create,
.lookup = nfs_lookup,
@@ -8764,7 +8799,7 @@ static const struct inode_operations nfs4_dir_inode_operations = {
.setattr = nfs_setattr,
.getxattr = generic_getxattr,
.setxattr = generic_setxattr,
- .listxattr = generic_listxattr,
+ .listxattr = nfs4_listxattr,
.removexattr = generic_removexattr,
};
@@ -8774,7 +8809,7 @@ static const struct inode_operations nfs4_file_inode_operations = {
.setattr = nfs_setattr,
.getxattr = generic_getxattr,
.setxattr = generic_setxattr,
- .listxattr = generic_listxattr,
+ .listxattr = nfs4_listxattr,
.removexattr = generic_removexattr,
};
@@ -8833,7 +8868,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
};
static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
- .prefix = XATTR_NAME_NFSV4_ACL,
+ .name = XATTR_NAME_NFSV4_ACL,
.list = nfs4_xattr_list_nfs4_acl,
.get = nfs4_xattr_get_nfs4_acl,
.set = nfs4_xattr_set_nfs4_acl,
diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c
index 0fbd3ab1b..8693d77c4 100644
--- a/fs/nfs/nfs4sysctl.c
+++ b/fs/nfs/nfs4sysctl.c
@@ -12,7 +12,7 @@
#include "nfs4idmap.h"
#include "callback.h"
-static const int nfs_set_port_min = 0;
+static const int nfs_set_port_min;
static const int nfs_set_port_max = 65535;
static struct ctl_table_header *nfs4_callback_sysctl_table;
diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c
index d774335cc..2850bce19 100644
--- a/fs/nfs/nfs4trace.c
+++ b/fs/nfs/nfs4trace.c
@@ -6,6 +6,7 @@
#include "internal.h"
#include "nfs4session.h"
#include "callback.h"
+#include "pnfs.h"
#define CREATE_TRACE_POINTS
#include "nfs4trace.h"
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 671cf68fe..2c8d05dae 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -321,6 +321,7 @@ TRACE_EVENT(nfs4_sequence_done,
__entry->highest_slotid = res->sr_highest_slotid;
__entry->target_highest_slotid =
res->sr_target_highest_slotid;
+ __entry->status_flags = res->sr_status_flags;
__entry->error = res->sr_status;
),
TP_printk(
@@ -399,6 +400,10 @@ DECLARE_EVENT_CLASS(nfs4_open_event,
__field(u64, fileid)
__field(u64, dir)
__string(name, ctx->dentry->d_name.name)
+ __field(int, stateid_seq)
+ __field(u32, stateid_hash)
+ __field(int, openstateid_seq)
+ __field(u32, openstateid_hash)
),
TP_fast_assign(
@@ -409,8 +414,22 @@ DECLARE_EVENT_CLASS(nfs4_open_event,
__entry->flags = flags;
__entry->fmode = (__force unsigned int)ctx->mode;
__entry->dev = ctx->dentry->d_sb->s_dev;
- if (!IS_ERR_OR_NULL(state))
+ if (!IS_ERR_OR_NULL(state)) {
inode = state->inode;
+ __entry->stateid_seq =
+ be32_to_cpu(state->stateid.seqid);
+ __entry->stateid_hash =
+ nfs_stateid_hash(&state->stateid);
+ __entry->openstateid_seq =
+ be32_to_cpu(state->open_stateid.seqid);
+ __entry->openstateid_hash =
+ nfs_stateid_hash(&state->open_stateid);
+ } else {
+ __entry->stateid_seq = 0;
+ __entry->stateid_hash = 0;
+ __entry->openstateid_seq = 0;
+ __entry->openstateid_hash = 0;
+ }
if (inode != NULL) {
__entry->fileid = NFS_FILEID(inode);
__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
@@ -425,7 +444,8 @@ DECLARE_EVENT_CLASS(nfs4_open_event,
TP_printk(
"error=%d (%s) flags=%d (%s) fmode=%s "
"fileid=%02x:%02x:%llu fhandle=0x%08x "
- "name=%02x:%02x:%llu/%s",
+ "name=%02x:%02x:%llu/%s stateid=%d:0x%08x "
+ "openstateid=%d:0x%08x",
__entry->error,
show_nfsv4_errors(__entry->error),
__entry->flags,
@@ -436,7 +456,9 @@ DECLARE_EVENT_CLASS(nfs4_open_event,
__entry->fhandle,
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->dir,
- __get_str(name)
+ __get_str(name),
+ __entry->stateid_seq, __entry->stateid_hash,
+ __entry->openstateid_seq, __entry->openstateid_hash
)
);
@@ -452,6 +474,45 @@ DEFINE_NFS4_OPEN_EVENT(nfs4_open_reclaim);
DEFINE_NFS4_OPEN_EVENT(nfs4_open_expired);
DEFINE_NFS4_OPEN_EVENT(nfs4_open_file);
+TRACE_EVENT(nfs4_cached_open,
+ TP_PROTO(
+ const struct nfs4_state *state
+ ),
+ TP_ARGS(state),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(unsigned int, fmode)
+ __field(int, stateid_seq)
+ __field(u32, stateid_hash)
+ ),
+
+ TP_fast_assign(
+ const struct inode *inode = state->inode;
+
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ __entry->fmode = (__force unsigned int)state->state;
+ __entry->stateid_seq =
+ be32_to_cpu(state->stateid.seqid);
+ __entry->stateid_hash =
+ nfs_stateid_hash(&state->stateid);
+ ),
+
+ TP_printk(
+ "fmode=%s fileid=%02x:%02x:%llu "
+ "fhandle=0x%08x stateid=%d:0x%08x",
+ __entry->fmode ? show_fmode_flags(__entry->fmode) :
+ "closed",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ __entry->stateid_seq, __entry->stateid_hash
+ )
+);
+
TRACE_EVENT(nfs4_close,
TP_PROTO(
const struct nfs4_state *state,
@@ -468,6 +529,8 @@ TRACE_EVENT(nfs4_close,
__field(u64, fileid)
__field(unsigned int, fmode)
__field(int, error)
+ __field(int, stateid_seq)
+ __field(u32, stateid_hash)
),
TP_fast_assign(
@@ -478,18 +541,23 @@ TRACE_EVENT(nfs4_close,
__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
__entry->fmode = (__force unsigned int)state->state;
__entry->error = error;
+ __entry->stateid_seq =
+ be32_to_cpu(args->stateid.seqid);
+ __entry->stateid_hash =
+ nfs_stateid_hash(&args->stateid);
),
TP_printk(
"error=%d (%s) fmode=%s fileid=%02x:%02x:%llu "
- "fhandle=0x%08x",
+ "fhandle=0x%08x openstateid=%d:0x%08x",
__entry->error,
show_nfsv4_errors(__entry->error),
__entry->fmode ? show_fmode_flags(__entry->fmode) :
"closed",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
- __entry->fhandle
+ __entry->fhandle,
+ __entry->stateid_seq, __entry->stateid_hash
)
);
@@ -523,6 +591,8 @@ DECLARE_EVENT_CLASS(nfs4_lock_event,
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
+ __field(int, stateid_seq)
+ __field(u32, stateid_hash)
),
TP_fast_assign(
@@ -536,11 +606,16 @@ DECLARE_EVENT_CLASS(nfs4_lock_event,
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = NFS_FILEID(inode);
__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ __entry->stateid_seq =
+ be32_to_cpu(state->stateid.seqid);
+ __entry->stateid_hash =
+ nfs_stateid_hash(&state->stateid);
),
TP_printk(
"error=%d (%s) cmd=%s:%s range=%lld:%lld "
- "fileid=%02x:%02x:%llu fhandle=0x%08x",
+ "fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "stateid=%d:0x%08x",
__entry->error,
show_nfsv4_errors(__entry->error),
show_lock_cmd(__entry->cmd),
@@ -549,7 +624,8 @@ DECLARE_EVENT_CLASS(nfs4_lock_event,
(long long)__entry->end,
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
- __entry->fhandle
+ __entry->fhandle,
+ __entry->stateid_seq, __entry->stateid_hash
)
);
@@ -563,11 +639,73 @@ DECLARE_EVENT_CLASS(nfs4_lock_event,
), \
TP_ARGS(request, state, cmd, error))
DEFINE_NFS4_LOCK_EVENT(nfs4_get_lock);
-DEFINE_NFS4_LOCK_EVENT(nfs4_set_lock);
-DEFINE_NFS4_LOCK_EVENT(nfs4_lock_reclaim);
-DEFINE_NFS4_LOCK_EVENT(nfs4_lock_expired);
DEFINE_NFS4_LOCK_EVENT(nfs4_unlock);
+TRACE_EVENT(nfs4_set_lock,
+ TP_PROTO(
+ const struct file_lock *request,
+ const struct nfs4_state *state,
+ const nfs4_stateid *lockstateid,
+ int cmd,
+ int error
+ ),
+
+ TP_ARGS(request, state, lockstateid, cmd, error),
+
+ TP_STRUCT__entry(
+ __field(int, error)
+ __field(int, cmd)
+ __field(char, type)
+ __field(loff_t, start)
+ __field(loff_t, end)
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(int, stateid_seq)
+ __field(u32, stateid_hash)
+ __field(int, lockstateid_seq)
+ __field(u32, lockstateid_hash)
+ ),
+
+ TP_fast_assign(
+ const struct inode *inode = state->inode;
+
+ __entry->error = error;
+ __entry->cmd = cmd;
+ __entry->type = request->fl_type;
+ __entry->start = request->fl_start;
+ __entry->end = request->fl_end;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ __entry->stateid_seq =
+ be32_to_cpu(state->stateid.seqid);
+ __entry->stateid_hash =
+ nfs_stateid_hash(&state->stateid);
+ __entry->lockstateid_seq =
+ be32_to_cpu(lockstateid->seqid);
+ __entry->lockstateid_hash =
+ nfs_stateid_hash(lockstateid);
+ ),
+
+ TP_printk(
+ "error=%d (%s) cmd=%s:%s range=%lld:%lld "
+ "fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "stateid=%d:0x%08x lockstateid=%d:0x%08x",
+ __entry->error,
+ show_nfsv4_errors(__entry->error),
+ show_lock_cmd(__entry->cmd),
+ show_lock_type(__entry->type),
+ (long long)__entry->start,
+ (long long)__entry->end,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ __entry->stateid_seq, __entry->stateid_hash,
+ __entry->lockstateid_seq, __entry->lockstateid_hash
+ )
+);
+
DECLARE_EVENT_CLASS(nfs4_set_delegation_event,
TP_PROTO(
const struct inode *inode,
@@ -621,20 +759,28 @@ TRACE_EVENT(nfs4_delegreturn_exit,
__field(dev_t, dev)
__field(u32, fhandle)
__field(int, error)
+ __field(int, stateid_seq)
+ __field(u32, stateid_hash)
),
TP_fast_assign(
__entry->dev = res->server->s_dev;
__entry->fhandle = nfs_fhandle_hash(args->fhandle);
__entry->error = error;
+ __entry->stateid_seq =
+ be32_to_cpu(args->stateid->seqid);
+ __entry->stateid_hash =
+ nfs_stateid_hash(args->stateid);
),
TP_printk(
- "error=%d (%s) dev=%02x:%02x fhandle=0x%08x",
+ "error=%d (%s) dev=%02x:%02x fhandle=0x%08x "
+ "stateid=%d:0x%08x",
__entry->error,
show_nfsv4_errors(__entry->error),
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->fhandle
+ __entry->fhandle,
+ __entry->stateid_seq, __entry->stateid_hash
)
);
@@ -653,6 +799,8 @@ DECLARE_EVENT_CLASS(nfs4_test_stateid_event,
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
+ __field(int, stateid_seq)
+ __field(u32, stateid_hash)
),
TP_fast_assign(
@@ -662,15 +810,21 @@ DECLARE_EVENT_CLASS(nfs4_test_stateid_event,
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = NFS_FILEID(inode);
__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ __entry->stateid_seq =
+ be32_to_cpu(state->stateid.seqid);
+ __entry->stateid_hash =
+ nfs_stateid_hash(&state->stateid);
),
TP_printk(
- "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x",
+ "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "stateid=%d:0x%08x",
__entry->error,
show_nfsv4_errors(__entry->error),
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
- __entry->fhandle
+ __entry->fhandle,
+ __entry->stateid_seq, __entry->stateid_hash
)
);
@@ -820,7 +974,6 @@ DECLARE_EVENT_CLASS(nfs4_inode_event,
), \
TP_ARGS(inode, error))
-DEFINE_NFS4_INODE_EVENT(nfs4_setattr);
DEFINE_NFS4_INODE_EVENT(nfs4_access);
DEFINE_NFS4_INODE_EVENT(nfs4_readlink);
DEFINE_NFS4_INODE_EVENT(nfs4_readdir);
@@ -830,8 +983,59 @@ DEFINE_NFS4_INODE_EVENT(nfs4_set_acl);
DEFINE_NFS4_INODE_EVENT(nfs4_get_security_label);
DEFINE_NFS4_INODE_EVENT(nfs4_set_security_label);
#endif /* CONFIG_NFS_V4_SECURITY_LABEL */
-DEFINE_NFS4_INODE_EVENT(nfs4_recall_delegation);
-DEFINE_NFS4_INODE_EVENT(nfs4_delegreturn);
+
+DECLARE_EVENT_CLASS(nfs4_inode_stateid_event,
+ TP_PROTO(
+ const struct inode *inode,
+ const nfs4_stateid *stateid,
+ int error
+ ),
+
+ TP_ARGS(inode, stateid, error),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(int, error)
+ __field(int, stateid_seq)
+ __field(u32, stateid_hash)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ __entry->error = error;
+ __entry->stateid_seq =
+ be32_to_cpu(stateid->seqid);
+ __entry->stateid_hash =
+ nfs_stateid_hash(stateid);
+ ),
+
+ TP_printk(
+ "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "stateid=%d:0x%08x",
+ __entry->error,
+ show_nfsv4_errors(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ __entry->stateid_seq, __entry->stateid_hash
+ )
+);
+
+#define DEFINE_NFS4_INODE_STATEID_EVENT(name) \
+ DEFINE_EVENT(nfs4_inode_stateid_event, name, \
+ TP_PROTO( \
+ const struct inode *inode, \
+ const nfs4_stateid *stateid, \
+ int error \
+ ), \
+ TP_ARGS(inode, stateid, error))
+
+DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_setattr);
+DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_delegreturn);
DECLARE_EVENT_CLASS(nfs4_getattr_event,
TP_PROTO(
@@ -941,8 +1145,74 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event,
), \
TP_ARGS(clp, fhandle, inode, error))
DEFINE_NFS4_INODE_CALLBACK_EVENT(nfs4_cb_getattr);
-DEFINE_NFS4_INODE_CALLBACK_EVENT(nfs4_cb_layoutrecall_inode);
+DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event,
+ TP_PROTO(
+ const struct nfs_client *clp,
+ const struct nfs_fh *fhandle,
+ const struct inode *inode,
+ const nfs4_stateid *stateid,
+ int error
+ ),
+
+ TP_ARGS(clp, fhandle, inode, stateid, error),
+
+ TP_STRUCT__entry(
+ __field(int, error)
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __string(dstaddr, clp ?
+ rpc_peeraddr2str(clp->cl_rpcclient,
+ RPC_DISPLAY_ADDR) : "unknown")
+ __field(int, stateid_seq)
+ __field(u32, stateid_hash)
+ ),
+
+ TP_fast_assign(
+ __entry->error = error;
+ __entry->fhandle = nfs_fhandle_hash(fhandle);
+ if (inode != NULL) {
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->dev = inode->i_sb->s_dev;
+ } else {
+ __entry->fileid = 0;
+ __entry->dev = 0;
+ }
+ __assign_str(dstaddr, clp ?
+ rpc_peeraddr2str(clp->cl_rpcclient,
+ RPC_DISPLAY_ADDR) : "unknown")
+ __entry->stateid_seq =
+ be32_to_cpu(stateid->seqid);
+ __entry->stateid_hash =
+ nfs_stateid_hash(stateid);
+ ),
+
+ TP_printk(
+ "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "stateid=%d:0x%08x dstaddr=%s",
+ __entry->error,
+ show_nfsv4_errors(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ __entry->stateid_seq, __entry->stateid_hash,
+ __get_str(dstaddr)
+ )
+);
+
+#define DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(name) \
+ DEFINE_EVENT(nfs4_inode_stateid_callback_event, name, \
+ TP_PROTO( \
+ const struct nfs_client *clp, \
+ const struct nfs_fh *fhandle, \
+ const struct inode *inode, \
+ const nfs4_stateid *stateid, \
+ int error \
+ ), \
+ TP_ARGS(clp, fhandle, inode, stateid, error))
+DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(nfs4_cb_recall);
+DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(nfs4_cb_layoutrecall_file);
DECLARE_EVENT_CLASS(nfs4_idmap_event,
TP_PROTO(
@@ -1005,28 +1275,37 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
__field(loff_t, offset)
__field(size_t, count)
__field(int, error)
+ __field(int, stateid_seq)
+ __field(u32, stateid_hash)
),
TP_fast_assign(
const struct inode *inode = hdr->inode;
+ const struct nfs4_state *state =
+ hdr->args.context->state;
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = NFS_FILEID(inode);
__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
__entry->offset = hdr->args.offset;
__entry->count = hdr->args.count;
__entry->error = error;
+ __entry->stateid_seq =
+ be32_to_cpu(state->stateid.seqid);
+ __entry->stateid_hash =
+ nfs_stateid_hash(&state->stateid);
),
TP_printk(
"error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld count=%zu",
+ "offset=%lld count=%zu stateid=%d:0x%08x",
__entry->error,
show_nfsv4_errors(__entry->error),
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
(long long)__entry->offset,
- __entry->count
+ __entry->count,
+ __entry->stateid_seq, __entry->stateid_hash
)
);
#define DEFINE_NFS4_READ_EVENT(name) \
@@ -1056,28 +1335,37 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
__field(loff_t, offset)
__field(size_t, count)
__field(int, error)
+ __field(int, stateid_seq)
+ __field(u32, stateid_hash)
),
TP_fast_assign(
const struct inode *inode = hdr->inode;
+ const struct nfs4_state *state =
+ hdr->args.context->state;
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = NFS_FILEID(inode);
__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
__entry->offset = hdr->args.offset;
__entry->count = hdr->args.count;
__entry->error = error;
+ __entry->stateid_seq =
+ be32_to_cpu(state->stateid.seqid);
+ __entry->stateid_hash =
+ nfs_stateid_hash(&state->stateid);
),
TP_printk(
"error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld count=%zu",
+ "offset=%lld count=%zu stateid=%d:0x%08x",
__entry->error,
show_nfsv4_errors(__entry->error),
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
(long long)__entry->offset,
- __entry->count
+ __entry->count,
+ __entry->stateid_seq, __entry->stateid_hash
)
);
@@ -1154,10 +1442,11 @@ TRACE_EVENT(nfs4_layoutget,
const struct nfs_open_context *ctx,
const struct pnfs_layout_range *args,
const struct pnfs_layout_range *res,
+ const nfs4_stateid *layout_stateid,
int error
),
- TP_ARGS(ctx, args, res, error),
+ TP_ARGS(ctx, args, res, layout_stateid, error),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -1167,10 +1456,15 @@ TRACE_EVENT(nfs4_layoutget,
__field(u64, offset)
__field(u64, count)
__field(int, error)
+ __field(int, stateid_seq)
+ __field(u32, stateid_hash)
+ __field(int, layoutstateid_seq)
+ __field(u32, layoutstateid_hash)
),
TP_fast_assign(
const struct inode *inode = d_inode(ctx->dentry);
+ const struct nfs4_state *state = ctx->state;
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = NFS_FILEID(inode);
__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
@@ -1178,11 +1472,25 @@ TRACE_EVENT(nfs4_layoutget,
__entry->offset = args->offset;
__entry->count = args->length;
__entry->error = error;
+ __entry->stateid_seq =
+ be32_to_cpu(state->stateid.seqid);
+ __entry->stateid_hash =
+ nfs_stateid_hash(&state->stateid);
+ if (!error) {
+ __entry->layoutstateid_seq =
+ be32_to_cpu(layout_stateid->seqid);
+ __entry->layoutstateid_hash =
+ nfs_stateid_hash(layout_stateid);
+ } else {
+ __entry->layoutstateid_seq = 0;
+ __entry->layoutstateid_hash = 0;
+ }
),
TP_printk(
"error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
- "iomode=%s offset=%llu count=%llu",
+ "iomode=%s offset=%llu count=%llu stateid=%d:0x%08x "
+ "layoutstateid=%d:0x%08x",
__entry->error,
show_nfsv4_errors(__entry->error),
MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -1190,14 +1498,83 @@ TRACE_EVENT(nfs4_layoutget,
__entry->fhandle,
show_pnfs_iomode(__entry->iomode),
(unsigned long long)__entry->offset,
- (unsigned long long)__entry->count
+ (unsigned long long)__entry->count,
+ __entry->stateid_seq, __entry->stateid_hash,
+ __entry->layoutstateid_seq, __entry->layoutstateid_hash
)
);
-DEFINE_NFS4_INODE_EVENT(nfs4_layoutcommit);
-DEFINE_NFS4_INODE_EVENT(nfs4_layoutreturn);
+DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutcommit);
+DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutreturn);
DEFINE_NFS4_INODE_EVENT(nfs4_layoutreturn_on_close);
+#define show_pnfs_update_layout_reason(reason) \
+ __print_symbolic(reason, \
+ { PNFS_UPDATE_LAYOUT_UNKNOWN, "unknown" }, \
+ { PNFS_UPDATE_LAYOUT_NO_PNFS, "no pnfs" }, \
+ { PNFS_UPDATE_LAYOUT_RD_ZEROLEN, "read+zerolen" }, \
+ { PNFS_UPDATE_LAYOUT_MDSTHRESH, "mdsthresh" }, \
+ { PNFS_UPDATE_LAYOUT_NOMEM, "nomem" }, \
+ { PNFS_UPDATE_LAYOUT_BULK_RECALL, "bulk recall" }, \
+ { PNFS_UPDATE_LAYOUT_IO_TEST_FAIL, "io test fail" }, \
+ { PNFS_UPDATE_LAYOUT_FOUND_CACHED, "found cached" }, \
+ { PNFS_UPDATE_LAYOUT_RETURN, "layoutreturn" }, \
+ { PNFS_UPDATE_LAYOUT_BLOCKED, "layouts blocked" }, \
+ { PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, "sent layoutget" })
+
+TRACE_EVENT(pnfs_update_layout,
+ TP_PROTO(struct inode *inode,
+ loff_t pos,
+ u64 count,
+ enum pnfs_iomode iomode,
+ struct pnfs_layout_hdr *lo,
+ enum pnfs_update_layout_reason reason
+ ),
+ TP_ARGS(inode, pos, count, iomode, lo, reason),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u64, fileid)
+ __field(u32, fhandle)
+ __field(loff_t, pos)
+ __field(u64, count)
+ __field(enum pnfs_iomode, iomode)
+ __field(int, layoutstateid_seq)
+ __field(u32, layoutstateid_hash)
+ __field(enum pnfs_update_layout_reason, reason)
+ ),
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ __entry->pos = pos;
+ __entry->count = count;
+ __entry->iomode = iomode;
+ __entry->reason = reason;
+ if (lo != NULL) {
+ __entry->layoutstateid_seq =
+ be32_to_cpu(lo->plh_stateid.seqid);
+ __entry->layoutstateid_hash =
+ nfs_stateid_hash(&lo->plh_stateid);
+ } else {
+ __entry->layoutstateid_seq = 0;
+ __entry->layoutstateid_hash = 0;
+ }
+ ),
+ TP_printk(
+ "fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "iomode=%s pos=%llu count=%llu "
+ "layoutstateid=%d:0x%08x (%s)",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ show_pnfs_iomode(__entry->iomode),
+ (unsigned long long)__entry->pos,
+ (unsigned long long)__entry->count,
+ __entry->layoutstateid_seq, __entry->layoutstateid_hash,
+ show_pnfs_update_layout_reason(__entry->reason)
+ )
+);
+
#endif /* CONFIG_NFS_V4_1 */
#endif /* _TRACE_NFS4_H */
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 59f838cdc..9f80a086b 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -39,7 +39,6 @@
{ 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \
{ 1 << NFS_INO_FLUSHING, "FLUSHING" }, \
{ 1 << NFS_INO_FSCACHE, "FSCACHE" }, \
- { 1 << NFS_INO_COMMIT, "COMMIT" }, \
{ 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \
{ 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" })
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 452a011ba..8ce4f61cb 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -101,53 +101,18 @@ nfs_page_free(struct nfs_page *p)
kmem_cache_free(nfs_page_cachep, p);
}
-static void
-nfs_iocounter_inc(struct nfs_io_counter *c)
-{
- atomic_inc(&c->io_count);
-}
-
-static void
-nfs_iocounter_dec(struct nfs_io_counter *c)
-{
- if (atomic_dec_and_test(&c->io_count)) {
- clear_bit(NFS_IO_INPROGRESS, &c->flags);
- smp_mb__after_atomic();
- wake_up_bit(&c->flags, NFS_IO_INPROGRESS);
- }
-}
-
-static int
-__nfs_iocounter_wait(struct nfs_io_counter *c)
-{
- wait_queue_head_t *wq = bit_waitqueue(&c->flags, NFS_IO_INPROGRESS);
- DEFINE_WAIT_BIT(q, &c->flags, NFS_IO_INPROGRESS);
- int ret = 0;
-
- do {
- prepare_to_wait(wq, &q.wait, TASK_KILLABLE);
- set_bit(NFS_IO_INPROGRESS, &c->flags);
- if (atomic_read(&c->io_count) == 0)
- break;
- ret = nfs_wait_bit_killable(&q.key, TASK_KILLABLE);
- } while (atomic_read(&c->io_count) != 0 && !ret);
- finish_wait(wq, &q.wait);
- return ret;
-}
-
/**
* nfs_iocounter_wait - wait for i/o to complete
- * @c: nfs_io_counter to use
+ * @l_ctx: nfs_lock_context with io_counter to use
*
* returns -ERESTARTSYS if interrupted by a fatal signal.
* Otherwise returns 0 once the io_count hits 0.
*/
int
-nfs_iocounter_wait(struct nfs_io_counter *c)
+nfs_iocounter_wait(struct nfs_lock_context *l_ctx)
{
- if (atomic_read(&c->io_count) == 0)
- return 0;
- return __nfs_iocounter_wait(c);
+ return wait_on_atomic_t(&l_ctx->io_count, nfs_wait_atomic_killable,
+ TASK_KILLABLE);
}
/*
@@ -370,7 +335,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page,
return ERR_CAST(l_ctx);
}
req->wb_lock_context = l_ctx;
- nfs_iocounter_inc(&l_ctx->io_count);
+ atomic_inc(&l_ctx->io_count);
/* Initialize the request struct. Initially, we assume a
* long write-back delay. This will be adjusted in
@@ -431,7 +396,8 @@ static void nfs_clear_request(struct nfs_page *req)
req->wb_page = NULL;
}
if (l_ctx != NULL) {
- nfs_iocounter_dec(&l_ctx->io_count);
+ if (atomic_dec_and_test(&l_ctx->io_count))
+ wake_up_atomic_t(&l_ctx->io_count);
nfs_put_lock_context(l_ctx);
req->wb_lock_context = NULL;
}
@@ -664,22 +630,11 @@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio);
* @desc: IO descriptor
* @hdr: pageio header
*/
-static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
+static void nfs_pgio_error(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_mirror *mirror;
- u32 midx;
-
set_bit(NFS_IOHDR_REDO, &hdr->flags);
nfs_pgio_data_destroy(hdr);
hdr->completion_ops->completion(hdr);
- /* TODO: Make sure it's right to clean up all mirrors here
- * and not just hdr->pgio_mirror_idx */
- for (midx = 0; midx < desc->pg_mirror_count; midx++) {
- mirror = &desc->pg_mirrors[midx];
- desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
- }
- return -ENOMEM;
}
/**
@@ -800,8 +755,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
unsigned int pagecount, pageused;
pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count);
- if (!nfs_pgarray_set(&hdr->page_array, pagecount))
- return nfs_pgio_error(desc, hdr);
+ if (!nfs_pgarray_set(&hdr->page_array, pagecount)) {
+ nfs_pgio_error(hdr);
+ desc->pg_error = -ENOMEM;
+ return desc->pg_error;
+ }
nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
pages = hdr->page_array.pagevec;
@@ -819,8 +777,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
*pages++ = last_page = req->wb_page;
}
}
- if (WARN_ON_ONCE(pageused != pagecount))
- return nfs_pgio_error(desc, hdr);
+ if (WARN_ON_ONCE(pageused != pagecount)) {
+ nfs_pgio_error(hdr);
+ desc->pg_error = -EINVAL;
+ return desc->pg_error;
+ }
if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
(desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
@@ -835,18 +796,13 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio);
static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
{
- struct nfs_pgio_mirror *mirror;
struct nfs_pgio_header *hdr;
int ret;
- mirror = nfs_pgio_current_mirror(desc);
-
hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
if (!hdr) {
- /* TODO: make sure this is right with mirroring - or
- * should it back out all mirrors? */
- desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
- return -ENOMEM;
+ desc->pg_error = -ENOMEM;
+ return desc->pg_error;
}
nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
ret = nfs_generic_pgio(desc, hdr);
@@ -874,6 +830,9 @@ static int nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio,
mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req);
+ if (pgio->pg_error < 0)
+ return pgio->pg_error;
+
if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX)
return -EINVAL;
@@ -903,12 +862,6 @@ static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio)
pgio->pg_mirrors_dynamic = NULL;
}
-static bool nfs_match_open_context(const struct nfs_open_context *ctx1,
- const struct nfs_open_context *ctx2)
-{
- return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state;
-}
-
static bool nfs_match_lock_context(const struct nfs_lock_context *l1,
const struct nfs_lock_context *l2)
{
@@ -982,6 +935,8 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
} else {
if (desc->pg_ops->pg_init)
desc->pg_ops->pg_init(desc, req);
+ if (desc->pg_error < 0)
+ return 0;
mirror->pg_base = req->wb_pgbase;
}
if (!nfs_can_coalesce_requests(prev, req, desc))
@@ -1147,6 +1102,8 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
bytes = req->wb_bytes;
nfs_pageio_setup_mirroring(desc, req);
+ if (desc->pg_error < 0)
+ goto out_failed;
for (midx = 0; midx < desc->pg_mirror_count; midx++) {
if (midx) {
@@ -1163,7 +1120,8 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
if (IS_ERR(dupreq)) {
nfs_page_group_unlock(req);
- return 0;
+ desc->pg_error = PTR_ERR(dupreq);
+ goto out_failed;
}
nfs_lock_request(dupreq);
@@ -1176,10 +1134,32 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
if (nfs_pgio_has_mirroring(desc))
desc->pg_mirror_idx = midx;
if (!nfs_pageio_add_request_mirror(desc, dupreq))
- return 0;
+ goto out_failed;
}
return 1;
+
+out_failed:
+ /*
+ * We might have failed before sending any reqs over wire.
+ * Clean up rest of the reqs in mirror pg_list.
+ */
+ if (desc->pg_error) {
+ struct nfs_pgio_mirror *mirror;
+ void (*func)(struct list_head *);
+
+ /* remember fatal errors */
+ if (nfs_error_is_fatal(desc->pg_error))
+ mapping_set_error(desc->pg_inode->i_mapping,
+ desc->pg_error);
+
+ func = desc->pg_completion_ops->error_cleanup;
+ for (midx = 0; midx < desc->pg_mirror_count; midx++) {
+ mirror = &desc->pg_mirrors[midx];
+ func(&mirror->pg_list);
+ }
+ }
+ return 0;
}
/*
@@ -1232,7 +1212,7 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
nfs_pageio_complete(desc);
if (!list_empty(&failed)) {
list_move(&failed, &hdr->pages);
- return -EIO;
+ return desc->pg_error < 0 ? desc->pg_error : -EIO;
}
return 0;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index bec038449..2fa483e6d 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -52,9 +52,7 @@ static DEFINE_SPINLOCK(pnfs_spinlock);
*/
static LIST_HEAD(pnfs_modules_tbl);
-static int
-pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid,
- enum pnfs_iomode iomode, bool sync);
+static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo);
/* Return the registered pnfs layout driver module matching given id */
static struct pnfs_layoutdriver_type *
@@ -243,6 +241,8 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
{
struct inode *inode = lo->plh_inode;
+ pnfs_layoutreturn_before_put_layout_hdr(lo);
+
if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
if (!list_empty(&lo->plh_segs))
WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n");
@@ -252,6 +252,27 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
}
}
+/*
+ * Mark a pnfs_layout_hdr and all associated layout segments as invalid
+ *
+ * In order to continue using the pnfs_layout_hdr, a full recovery
+ * is required.
+ * Note that caller must hold inode->i_lock.
+ */
+static int
+pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
+ struct list_head *lseg_list)
+{
+ struct pnfs_layout_range range = {
+ .iomode = IOMODE_ANY,
+ .offset = 0,
+ .length = NFS4_MAX_UINT64,
+ };
+
+ set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+ return pnfs_mark_matching_lsegs_invalid(lo, lseg_list, &range);
+}
+
static int
pnfs_iomode_to_fail_bit(u32 iomode)
{
@@ -345,58 +366,6 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
}
-/* Return true if layoutreturn is needed */
-static bool
-pnfs_layout_need_return(struct pnfs_layout_hdr *lo,
- struct pnfs_layout_segment *lseg)
-{
- struct pnfs_layout_segment *s;
-
- if (!test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
- return false;
-
- list_for_each_entry(s, &lo->plh_segs, pls_list)
- if (s != lseg && test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags))
- return false;
-
- return true;
-}
-
-static bool
-pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
-{
- if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
- return false;
- lo->plh_return_iomode = 0;
- pnfs_get_layout_hdr(lo);
- clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags);
- return true;
-}
-
-static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
- struct pnfs_layout_hdr *lo, struct inode *inode)
-{
- lo = lseg->pls_layout;
- inode = lo->plh_inode;
-
- spin_lock(&inode->i_lock);
- if (pnfs_layout_need_return(lo, lseg)) {
- nfs4_stateid stateid;
- enum pnfs_iomode iomode;
- bool send;
-
- stateid = lo->plh_stateid;
- iomode = lo->plh_return_iomode;
- send = pnfs_prepare_layoutreturn(lo);
- spin_unlock(&inode->i_lock);
- if (send) {
- /* Send an async layoutreturn so we dont deadlock */
- pnfs_send_layoutreturn(lo, stateid, iomode, false);
- }
- } else
- spin_unlock(&inode->i_lock);
-}
-
void
pnfs_put_lseg(struct pnfs_layout_segment *lseg)
{
@@ -410,15 +379,8 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg)
atomic_read(&lseg->pls_refcount),
test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
- /* Handle the case where refcount != 1 */
- if (atomic_add_unless(&lseg->pls_refcount, -1, 1))
- return;
-
lo = lseg->pls_layout;
inode = lo->plh_inode;
- /* Do we need a layoutreturn? */
- if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
- pnfs_layoutreturn_before_put_lseg(lseg, lo, inode);
if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
@@ -566,10 +528,10 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
int
pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
- struct pnfs_layout_range *recall_range)
+ const struct pnfs_layout_range *recall_range)
{
struct pnfs_layout_segment *lseg, *next;
- int invalid = 0, removed = 0;
+ int remaining = 0;
dprintk("%s:Begin lo %p\n", __func__, lo);
@@ -582,11 +544,11 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
"offset %llu length %llu\n", __func__,
lseg, lseg->pls_range.iomode, lseg->pls_range.offset,
lseg->pls_range.length);
- invalid++;
- removed += mark_lseg_invalid(lseg, tmp_list);
+ if (!mark_lseg_invalid(lseg, tmp_list))
+ remaining++;
}
- dprintk("%s:Return %i\n", __func__, invalid - removed);
- return invalid - removed;
+ dprintk("%s:Return %i\n", __func__, remaining);
+ return remaining;
}
/* note free_me must contain lsegs from a single layout_hdr */
@@ -613,12 +575,10 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
spin_lock(&nfsi->vfs_inode.i_lock);
lo = nfsi->layout;
if (lo) {
- lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
- pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
pnfs_get_layout_hdr(lo);
+ pnfs_mark_layout_stateid_invalid(lo, &tmp_list);
pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED);
pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED);
- pnfs_clear_retry_layoutget(lo);
spin_unlock(&nfsi->vfs_inode.i_lock);
pnfs_free_lseg_list(&tmp_list);
pnfs_put_layout_hdr(lo);
@@ -677,11 +637,6 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
{
struct pnfs_layout_hdr *lo;
struct inode *inode;
- struct pnfs_layout_range range = {
- .iomode = IOMODE_ANY,
- .offset = 0,
- .length = NFS4_MAX_UINT64,
- };
LIST_HEAD(lseg_list);
int ret = 0;
@@ -696,13 +651,15 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
spin_lock(&inode->i_lock);
list_del_init(&lo->plh_bulk_destroy);
- lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
- if (is_bulk_recall)
- set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
- if (pnfs_mark_matching_lsegs_invalid(lo, &lseg_list, &range))
+ if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) {
+ if (is_bulk_recall)
+ set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
ret = -EAGAIN;
+ }
spin_unlock(&inode->i_lock);
pnfs_free_lseg_list(&lseg_list);
+ /* Free all lsegs that are attached to commit buckets */
+ nfs_commit_inode(inode, 0);
pnfs_put_layout_hdr(lo);
iput(inode);
}
@@ -826,7 +783,7 @@ pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo)
int
pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
- struct pnfs_layout_range *range,
+ const struct pnfs_layout_range *range,
struct nfs4_state *open_state)
{
int status = 0;
@@ -861,7 +818,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
static struct pnfs_layout_segment *
send_layoutget(struct pnfs_layout_hdr *lo,
struct nfs_open_context *ctx,
- struct pnfs_layout_range *range,
+ const struct pnfs_layout_range *range,
gfp_t gfp_flags)
{
struct inode *ino = lo->plh_inode;
@@ -894,7 +851,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
lgp->args.minlength = i_size - range->offset;
}
lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
- lgp->args.range = *range;
+ pnfs_copy_range(&lgp->args.range, range);
lgp->args.type = server->pnfs_curr_ld->id;
lgp->args.inode = ino;
lgp->args.ctx = get_nfs_open_context(ctx);
@@ -904,17 +861,9 @@ send_layoutget(struct pnfs_layout_hdr *lo,
lseg = nfs4_proc_layoutget(lgp, gfp_flags);
} while (lseg == ERR_PTR(-EAGAIN));
- if (IS_ERR(lseg)) {
- switch (PTR_ERR(lseg)) {
- case -ENOMEM:
- case -ERESTARTSYS:
- break;
- default:
- /* remember that LAYOUTGET failed and suspend trying */
- pnfs_layout_io_set_failed(lo, range->iomode);
- }
- return NULL;
- } else
+ if (IS_ERR(lseg) && !nfs_error_is_fatal(PTR_ERR(lseg)))
+ lseg = NULL;
+ else
pnfs_layout_clear_fail_bit(lo,
pnfs_iomode_to_fail_bit(range->iomode));
@@ -944,8 +893,19 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
}
+static bool
+pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
+{
+ if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
+ return false;
+ lo->plh_return_iomode = 0;
+ pnfs_get_layout_hdr(lo);
+ clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
+ return true;
+}
+
static int
-pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid,
+pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
enum pnfs_iomode iomode, bool sync)
{
struct inode *ino = lo->plh_inode;
@@ -962,7 +922,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid,
goto out;
}
- lrp->args.stateid = stateid;
+ nfs4_stateid_copy(&lrp->args.stateid, stateid);
lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id;
lrp->args.inode = ino;
lrp->args.range.iomode = iomode;
@@ -978,6 +938,48 @@ out:
return status;
}
+/* Return true if layoutreturn is needed */
+static bool
+pnfs_layout_need_return(struct pnfs_layout_hdr *lo)
+{
+ struct pnfs_layout_segment *s;
+
+ if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
+ return false;
+
+ /* Defer layoutreturn until all lsegs are done */
+ list_for_each_entry(s, &lo->plh_segs, pls_list) {
+ if (test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags))
+ return false;
+ }
+
+ return true;
+}
+
+static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)
+{
+ struct inode *inode= lo->plh_inode;
+
+ if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
+ return;
+ spin_lock(&inode->i_lock);
+ if (pnfs_layout_need_return(lo)) {
+ nfs4_stateid stateid;
+ enum pnfs_iomode iomode;
+ bool send;
+
+ nfs4_stateid_copy(&stateid, &lo->plh_stateid);
+ iomode = lo->plh_return_iomode;
+ send = pnfs_prepare_layoutreturn(lo);
+ spin_unlock(&inode->i_lock);
+ if (send) {
+ /* Send an async layoutreturn so we dont deadlock */
+ pnfs_send_layoutreturn(lo, &stateid, iomode, false);
+ }
+ } else
+ spin_unlock(&inode->i_lock);
+}
+
/*
* Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
* when the layout segment list is empty.
@@ -1005,7 +1007,7 @@ _pnfs_return_layout(struct inode *ino)
dprintk("NFS: %s no layout to return\n", __func__);
goto out;
}
- stateid = nfsi->layout->plh_stateid;
+ nfs4_stateid_copy(&stateid, &nfsi->layout->plh_stateid);
/* Reference matched in nfs4_layoutreturn_release */
pnfs_get_layout_hdr(lo);
empty = list_empty(&lo->plh_segs);
@@ -1033,7 +1035,7 @@ _pnfs_return_layout(struct inode *ino)
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&tmp_list);
if (send)
- status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true);
+ status = pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true);
out_put_layout_hdr:
pnfs_put_layout_hdr(lo);
out:
@@ -1096,13 +1098,12 @@ bool pnfs_roc(struct inode *ino)
goto out_noroc;
}
- stateid = lo->plh_stateid;
+ nfs4_stateid_copy(&stateid, &lo->plh_stateid);
/* always send layoutreturn if being marked so */
- if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+ if (test_and_clear_bit(NFS_LAYOUT_RETURN_REQUESTED,
&lo->plh_flags))
layoutreturn = pnfs_prepare_layoutreturn(lo);
- pnfs_clear_retry_layoutget(lo);
list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list)
/* If we are sending layoutreturn, invalidate all valid lsegs */
if (layoutreturn || test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
@@ -1124,7 +1125,7 @@ out_noroc:
pnfs_free_lseg_list(&tmp_list);
pnfs_layoutcommit_inode(ino, true);
if (layoutreturn)
- pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true);
+ pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true);
return roc;
}
@@ -1149,6 +1150,7 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
spin_lock(&ino->i_lock);
lo = NFS_I(ino)->layout;
+ pnfs_mark_layout_returned_if_empty(lo);
if (pnfs_seqid_is_newer(barrier, lo->plh_barrier))
lo->plh_barrier = barrier;
spin_unlock(&ino->i_lock);
@@ -1465,25 +1467,15 @@ static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
return ret;
}
-/* stop waiting if someone clears NFS_LAYOUT_RETRY_LAYOUTGET bit. */
-static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key, int mode)
-{
- if (!test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, key->flags))
- return 1;
- return nfs_wait_bit_killable(key, mode);
-}
-
static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo)
{
- if (!pnfs_should_retry_layoutget(lo))
- return false;
/*
* send layoutcommit as it can hold up layoutreturn due to lseg
* reference
*/
pnfs_layoutcommit_inode(lo->plh_inode, false);
return !wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN,
- pnfs_layoutget_retry_bit_wait,
+ nfs_wait_bit_killable,
TASK_UNINTERRUPTIBLE);
}
@@ -1520,14 +1512,23 @@ pnfs_update_layout(struct inode *ino,
struct pnfs_layout_segment *lseg = NULL;
bool first;
- if (!pnfs_enabled_sb(NFS_SERVER(ino)))
+ if (!pnfs_enabled_sb(NFS_SERVER(ino))) {
+ trace_pnfs_update_layout(ino, pos, count, iomode, NULL,
+ PNFS_UPDATE_LAYOUT_NO_PNFS);
goto out;
+ }
- if (iomode == IOMODE_READ && i_size_read(ino) == 0)
+ if (iomode == IOMODE_READ && i_size_read(ino) == 0) {
+ trace_pnfs_update_layout(ino, pos, count, iomode, NULL,
+ PNFS_UPDATE_LAYOUT_RD_ZEROLEN);
goto out;
+ }
- if (pnfs_within_mdsthreshold(ctx, ino, iomode))
+ if (pnfs_within_mdsthreshold(ctx, ino, iomode)) {
+ trace_pnfs_update_layout(ino, pos, count, iomode, NULL,
+ PNFS_UPDATE_LAYOUT_MDSTHRESH);
goto out;
+ }
lookup_again:
first = false;
@@ -1535,19 +1536,25 @@ lookup_again:
lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
if (lo == NULL) {
spin_unlock(&ino->i_lock);
+ trace_pnfs_update_layout(ino, pos, count, iomode, NULL,
+ PNFS_UPDATE_LAYOUT_NOMEM);
goto out;
}
/* Do we even need to bother with this? */
if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
+ trace_pnfs_update_layout(ino, pos, count, iomode, lo,
+ PNFS_UPDATE_LAYOUT_BULK_RECALL);
dprintk("%s matches recall, use MDS\n", __func__);
goto out_unlock;
}
/* if LAYOUTGET already failed once we don't try again */
- if (pnfs_layout_io_test_failed(lo, iomode) &&
- !pnfs_should_retry_layoutget(lo))
+ if (pnfs_layout_io_test_failed(lo, iomode)) {
+ trace_pnfs_update_layout(ino, pos, count, iomode, lo,
+ PNFS_UPDATE_LAYOUT_IO_TEST_FAIL);
goto out_unlock;
+ }
first = list_empty(&lo->plh_segs);
if (first) {
@@ -1567,8 +1574,11 @@ lookup_again:
* already exists
*/
lseg = pnfs_find_lseg(lo, &arg);
- if (lseg)
+ if (lseg) {
+ trace_pnfs_update_layout(ino, pos, count, iomode, lo,
+ PNFS_UPDATE_LAYOUT_FOUND_CACHED);
goto out_unlock;
+ }
}
/*
@@ -1585,11 +1595,16 @@ lookup_again:
dprintk("%s retrying\n", __func__);
goto lookup_again;
}
+ trace_pnfs_update_layout(ino, pos, count, iomode, lo,
+ PNFS_UPDATE_LAYOUT_RETURN);
goto out_put_layout_hdr;
}
- if (pnfs_layoutgets_blocked(lo))
+ if (pnfs_layoutgets_blocked(lo)) {
+ trace_pnfs_update_layout(ino, pos, count, iomode, lo,
+ PNFS_UPDATE_LAYOUT_BLOCKED);
goto out_unlock;
+ }
atomic_inc(&lo->plh_outstanding);
spin_unlock(&ino->i_lock);
@@ -1612,8 +1627,9 @@ lookup_again:
arg.length = PAGE_CACHE_ALIGN(arg.length);
lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
- pnfs_clear_retry_layoutget(lo);
atomic_dec(&lo->plh_outstanding);
+ trace_pnfs_update_layout(ino, pos, count, iomode, lo,
+ PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
out_put_layout_hdr:
if (first)
pnfs_clear_first_layoutget(lo);
@@ -1623,7 +1639,7 @@ out:
"(%s, offset: %llu, length: %llu)\n",
__func__, ino->i_sb->s_id,
(unsigned long long)NFS_FILEID(ino),
- lseg == NULL ? "not found" : "found",
+ IS_ERR_OR_NULL(lseg) ? "not found" : "found",
iomode==IOMODE_RW ? "read/write" : "read-only",
(unsigned long long)pos,
(unsigned long long)count);
@@ -1730,16 +1746,40 @@ out_forget_reply:
}
static void
+pnfs_set_plh_return_iomode(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode)
+{
+ if (lo->plh_return_iomode == iomode)
+ return;
+ if (lo->plh_return_iomode != 0)
+ iomode = IOMODE_ANY;
+ lo->plh_return_iomode = iomode;
+ set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
+}
+
+/**
+ * pnfs_mark_matching_lsegs_return - Free or return matching layout segments
+ * @lo: pointer to layout header
+ * @tmp_list: list header to be used with pnfs_free_lseg_list()
+ * @return_range: describe layout segment ranges to be returned
+ *
+ * This function is mainly intended for use by layoutrecall. It attempts
+ * to free the layout segment immediately, or else to mark it for return
+ * as soon as its reference count drops to zero.
+ */
+int
pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
- struct pnfs_layout_range *return_range)
+ const struct pnfs_layout_range *return_range)
{
struct pnfs_layout_segment *lseg, *next;
+ int remaining = 0;
dprintk("%s:Begin lo %p\n", __func__, lo);
if (list_empty(&lo->plh_segs))
- return;
+ return 0;
+
+ assert_spin_locked(&lo->plh_inode->i_lock);
list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
if (should_free_lseg(&lseg->pls_range, return_range)) {
@@ -1748,39 +1788,47 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
lseg, lseg->pls_range.iomode,
lseg->pls_range.offset,
lseg->pls_range.length);
+ if (mark_lseg_invalid(lseg, tmp_list))
+ continue;
+ remaining++;
set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
- mark_lseg_invalid(lseg, tmp_list);
- set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
- &lo->plh_flags);
+ pnfs_set_plh_return_iomode(lo, return_range->iomode);
}
+ return remaining;
}
void pnfs_error_mark_layout_for_return(struct inode *inode,
struct pnfs_layout_segment *lseg)
{
struct pnfs_layout_hdr *lo = NFS_I(inode)->layout;
- int iomode = pnfs_iomode_to_fail_bit(lseg->pls_range.iomode);
struct pnfs_layout_range range = {
.iomode = lseg->pls_range.iomode,
.offset = 0,
.length = NFS4_MAX_UINT64,
};
LIST_HEAD(free_me);
+ bool return_now = false;
spin_lock(&inode->i_lock);
- /* set failure bit so that pnfs path will be retried later */
- pnfs_layout_set_fail_bit(lo, iomode);
- if (lo->plh_return_iomode == 0)
- lo->plh_return_iomode = range.iomode;
- else if (lo->plh_return_iomode != range.iomode)
- lo->plh_return_iomode = IOMODE_ANY;
+ pnfs_set_plh_return_iomode(lo, range.iomode);
/*
* mark all matching lsegs so that we are sure to have no live
* segments at hand when sending layoutreturn. See pnfs_put_lseg()
* for how it works.
*/
- pnfs_mark_matching_lsegs_return(lo, &free_me, &range);
- spin_unlock(&inode->i_lock);
+ if (!pnfs_mark_matching_lsegs_return(lo, &free_me, &range)) {
+ nfs4_stateid stateid;
+ enum pnfs_iomode iomode = lo->plh_return_iomode;
+
+ nfs4_stateid_copy(&stateid, &lo->plh_stateid);
+ return_now = pnfs_prepare_layoutreturn(lo);
+ spin_unlock(&inode->i_lock);
+ if (return_now)
+ pnfs_send_layoutreturn(lo, &stateid, iomode, false);
+ } else {
+ spin_unlock(&inode->i_lock);
+ nfs_commit_inode(inode, 0);
+ }
pnfs_free_lseg_list(&free_me);
}
EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return);
@@ -1802,6 +1850,11 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
rd_size,
IOMODE_READ,
GFP_KERNEL);
+ if (IS_ERR(pgio->pg_lseg)) {
+ pgio->pg_error = PTR_ERR(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ return;
+ }
}
/* If no lseg, fall back to read through mds */
if (pgio->pg_lseg == NULL)
@@ -1814,13 +1867,19 @@ void
pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req, u64 wb_size)
{
- if (pgio->pg_lseg == NULL)
+ if (pgio->pg_lseg == NULL) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
req_offset(req),
wb_size,
IOMODE_RW,
GFP_NOFS);
+ if (IS_ERR(pgio->pg_lseg)) {
+ pgio->pg_error = PTR_ERR(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ return;
+ }
+ }
/* If no lseg, fall back to write through mds */
if (pgio->pg_lseg == NULL)
nfs_pageio_reset_write_mds(pgio);
@@ -1988,15 +2047,13 @@ static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
int
pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
{
- struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
-
struct nfs_pgio_header *hdr;
int ret;
hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
if (!hdr) {
- desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
- return -ENOMEM;
+ desc->pg_error = -ENOMEM;
+ return desc->pg_error;
}
nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
@@ -2119,15 +2176,13 @@ static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
int
pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
{
- struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
-
struct nfs_pgio_header *hdr;
int ret;
hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
if (!hdr) {
- desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
- return -ENOMEM;
+ desc->pg_error = -ENOMEM;
+ return desc->pg_error;
}
nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index d1990e90e..1ac1db5f6 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -94,11 +94,10 @@ enum {
NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */
NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */
- NFS_LAYOUT_RETURN, /* Return this layout ASAP */
- NFS_LAYOUT_RETURN_BEFORE_CLOSE, /* Return this layout before close */
+ NFS_LAYOUT_RETURN, /* layoutreturn in progress */
+ NFS_LAYOUT_RETURN_REQUESTED, /* Return this layout ASAP */
NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */
NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */
- NFS_LAYOUT_RETRY_LAYOUTGET, /* Retry layoutget */
};
enum layoutdriver_policy_flags {
@@ -261,11 +260,14 @@ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
bool update_barrier);
int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
struct pnfs_layout_hdr *lo,
- struct pnfs_layout_range *range,
+ const struct pnfs_layout_range *range,
struct nfs4_state *open_state);
int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
- struct pnfs_layout_range *recall_range);
+ const struct pnfs_layout_range *recall_range);
+int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
+ struct list_head *tmp_list,
+ const struct pnfs_layout_range *recall_range);
bool pnfs_roc(struct inode *ino);
void pnfs_roc_release(struct inode *ino);
void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
@@ -379,26 +381,6 @@ nfs4_get_deviceid(struct nfs4_deviceid_node *d)
return d;
}
-static inline void pnfs_set_retry_layoutget(struct pnfs_layout_hdr *lo)
-{
- if (!test_and_set_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags))
- atomic_inc(&lo->plh_refcount);
-}
-
-static inline void pnfs_clear_retry_layoutget(struct pnfs_layout_hdr *lo)
-{
- if (test_and_clear_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags)) {
- atomic_dec(&lo->plh_refcount);
- /* wake up waiters for LAYOUTRETURN as that is not needed */
- wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN);
- }
-}
-
-static inline bool pnfs_should_retry_layoutget(struct pnfs_layout_hdr *lo)
-{
- return test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags);
-}
-
static inline struct pnfs_layout_segment *
pnfs_get_lseg(struct pnfs_layout_segment *lseg)
{
@@ -409,6 +391,12 @@ pnfs_get_lseg(struct pnfs_layout_segment *lseg)
return lseg;
}
+static inline bool
+pnfs_is_valid_lseg(struct pnfs_layout_segment *lseg)
+{
+ return test_bit(NFS_LSEG_VALID, &lseg->pls_flags) != 0;
+}
+
/* Return true if a layout driver is being used for this mountpoint */
static inline int pnfs_enabled_sb(struct nfs_server *nfss)
{
@@ -556,6 +544,26 @@ pnfs_calc_offset_length(u64 offset, u64 end)
return 1 + end - offset;
}
+/**
+ * pnfs_mark_layout_returned_if_empty - marks the layout as returned
+ * @lo: layout header
+ *
+ * Note: Caller must hold inode->i_lock
+ */
+static inline void
+pnfs_mark_layout_returned_if_empty(struct pnfs_layout_hdr *lo)
+{
+ if (list_empty(&lo->plh_segs))
+ set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+}
+
+static inline void
+pnfs_copy_range(struct pnfs_layout_range *dst,
+ const struct pnfs_layout_range *src)
+{
+ memcpy(dst, src, sizeof(*dst));
+}
+
extern unsigned int layoutstats_timer;
#ifdef NFS_DEBUG
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 24655b807..81ac6480f 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -266,17 +266,14 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
} else {
nfs_retry_commit(mds_pages, NULL, cinfo, 0);
pnfs_generic_retry_commit(cinfo, 0);
- cinfo->completion_ops->error_cleanup(NFS_I(inode));
return -ENOMEM;
}
}
nreq += pnfs_generic_alloc_ds_commits(cinfo, &list);
- if (nreq == 0) {
- cinfo->completion_ops->error_cleanup(NFS_I(inode));
+ if (nreq == 0)
goto out;
- }
atomic_add(nreq, &cinfo->mds->rpcs_out);
@@ -871,6 +868,11 @@ pnfs_layout_mark_request_commit(struct nfs_page *req,
buckets = cinfo->ds->buckets;
list = &buckets[ds_commit_idx].written;
if (list_empty(list)) {
+ if (!pnfs_is_valid_lseg(lseg)) {
+ spin_unlock(cinfo->lock);
+ cinfo->completion_ops->resched_write(cinfo, req);
+ return;
+ }
/* Non-empty buckets hold a reference on the lseg. That ref
* is normally transferred to the COMMIT call and released
* there. It could also be released if the last req is pulled
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 0a5e33f33..eb31e23e7 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -85,6 +85,23 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
}
EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
+static void nfs_readpage_release(struct nfs_page *req)
+{
+ struct inode *inode = d_inode(req->wb_context->dentry);
+
+ dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
+ (unsigned long long)NFS_FILEID(inode), req->wb_bytes,
+ (long long)req_offset(req));
+
+ if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
+ if (PageUptodate(req->wb_page))
+ nfs_readpage_to_fscache(inode, req->wb_page, 0);
+
+ unlock_page(req->wb_page);
+ }
+ nfs_release_request(req);
+}
+
int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
struct page *page)
{
@@ -106,7 +123,10 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
nfs_pageio_init_read(&pgio, inode, false,
&nfs_async_read_completion_ops);
- nfs_pageio_add_request(&pgio, new);
+ if (!nfs_pageio_add_request(&pgio, new)) {
+ nfs_list_remove_request(new);
+ nfs_readpage_release(new);
+ }
nfs_pageio_complete(&pgio);
/* It doesn't make sense to do mirrored reads! */
@@ -115,24 +135,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
pgm = &pgio.pg_mirrors[0];
NFS_I(inode)->read_io += pgm->pg_bytes_written;
- return 0;
-}
-
-static void nfs_readpage_release(struct nfs_page *req)
-{
- struct inode *inode = d_inode(req->wb_context->dentry);
-
- dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
- (unsigned long long)NFS_FILEID(inode), req->wb_bytes,
- (long long)req_offset(req));
-
- if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
- if (PageUptodate(req->wb_page))
- nfs_readpage_to_fscache(inode, req->wb_page, 0);
-
- unlock_page(req->wb_page);
- }
- nfs_release_request(req);
+ return pgio.pg_error < 0 ? pgio.pg_error : 0;
}
static void nfs_page_group_set_uptodate(struct nfs_page *req)
@@ -361,6 +364,8 @@ readpage_async_filler(void *data, struct page *page)
if (len < PAGE_CACHE_SIZE)
zero_user_segment(page, len, PAGE_CACHE_SIZE);
if (!nfs_pageio_add_request(desc->pgio, new)) {
+ nfs_list_remove_request(new);
+ nfs_readpage_release(new);
error = desc->pgio->pg_error;
goto out_unlock;
}
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index b6de433da..4fe3eead3 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -42,21 +42,35 @@ error:
return -EIO;
}
-static const char *nfs_follow_link(struct dentry *dentry, void **cookie)
+static const char *nfs_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct inode *inode = d_inode(dentry);
struct page *page;
void *err;
- err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
- if (err)
- return err;
- page = read_cache_page(&inode->i_data, 0,
- (filler_t *)nfs_symlink_filler, inode);
- if (IS_ERR(page))
- return ERR_CAST(page);
- *cookie = page;
- return kmap(page);
+ if (!dentry) {
+ err = ERR_PTR(nfs_revalidate_mapping_rcu(inode));
+ if (err)
+ return err;
+ page = find_get_page(inode->i_mapping, 0);
+ if (!page)
+ return ERR_PTR(-ECHILD);
+ if (!PageUptodate(page)) {
+ put_page(page);
+ return ERR_PTR(-ECHILD);
+ }
+ } else {
+ err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
+ if (err)
+ return err;
+ page = read_cache_page(&inode->i_data, 0,
+ (filler_t *)nfs_symlink_filler, inode);
+ if (IS_ERR(page))
+ return ERR_CAST(page);
+ }
+ set_delayed_call(done, page_put_link, page);
+ return page_address(page);
}
/*
@@ -64,8 +78,7 @@ static const char *nfs_follow_link(struct dentry *dentry, void **cookie)
*/
const struct inode_operations nfs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = nfs_follow_link,
- .put_link = page_put_link,
+ .get_link = nfs_get_link,
.getattr = nfs_getattr,
.setattr = nfs_setattr,
};
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 7b9316406..5754835a2 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -21,6 +21,8 @@
#include <linux/nfs_page.h>
#include <linux/backing-dev.h>
#include <linux/export.h>
+#include <linux/freezer.h>
+#include <linux/wait.h>
#include <asm/uaccess.h>
@@ -244,11 +246,9 @@ static int wb_priority(struct writeback_control *wbc)
{
int ret = 0;
if (wbc->for_reclaim)
- return FLUSH_HIGHPRI | FLUSH_STABLE;
+ return FLUSH_HIGHPRI | FLUSH_COND_STABLE;
if (wbc->sync_mode == WB_SYNC_ALL)
ret = FLUSH_COND_STABLE;
- if (wbc->for_kupdate || wbc->for_background)
- ret |= FLUSH_LOWPRI;
return ret;
}
@@ -545,12 +545,22 @@ try_again:
return head;
}
+static void nfs_write_error_remove_page(struct nfs_page *req)
+{
+ nfs_unlock_request(req);
+ nfs_end_page_writeback(req);
+ nfs_release_request(req);
+ generic_error_remove_page(page_file_mapping(req->wb_page),
+ req->wb_page);
+}
+
/*
* Find an associated nfs write request, and prepare to flush it out
* May return an error if the user signalled nfs_wait_on_request().
*/
static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
- struct page *page, bool nonblock)
+ struct page *page, bool nonblock,
+ bool launder)
{
struct nfs_page *req;
int ret = 0;
@@ -567,8 +577,21 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
ret = 0;
if (!nfs_pageio_add_request(pgio, req)) {
- nfs_redirty_request(req);
ret = pgio->pg_error;
+ /*
+ * Remove the problematic req upon fatal errors
+ * in launder case, while other dirty pages can
+ * still be around until they get flushed.
+ */
+ if (nfs_error_is_fatal(ret)) {
+ nfs_context_set_write_error(req->wb_context, ret);
+ if (launder) {
+ nfs_write_error_remove_page(req);
+ goto out;
+ }
+ }
+ nfs_redirty_request(req);
+ ret = -EAGAIN;
} else
nfs_add_stats(page_file_mapping(page)->host,
NFSIOS_WRITEPAGES, 1);
@@ -576,12 +599,14 @@ out:
return ret;
}
-static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio)
+static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
+ struct nfs_pageio_descriptor *pgio, bool launder)
{
int ret;
nfs_pageio_cond_complete(pgio, page_file_index(page));
- ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
+ ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE,
+ launder);
if (ret == -EAGAIN) {
redirty_page_for_writepage(wbc, page);
ret = 0;
@@ -592,7 +617,9 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st
/*
* Write an mmapped page to the server.
*/
-static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc)
+static int nfs_writepage_locked(struct page *page,
+ struct writeback_control *wbc,
+ bool launder)
{
struct nfs_pageio_descriptor pgio;
struct inode *inode = page_file_mapping(page)->host;
@@ -601,7 +628,7 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
nfs_pageio_init_write(&pgio, inode, wb_priority(wbc),
false, &nfs_async_write_completion_ops);
- err = nfs_do_writepage(page, wbc, &pgio);
+ err = nfs_do_writepage(page, wbc, &pgio, launder);
nfs_pageio_complete(&pgio);
if (err < 0)
return err;
@@ -614,7 +641,7 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
{
int ret;
- ret = nfs_writepage_locked(page, wbc);
+ ret = nfs_writepage_locked(page, wbc, false);
unlock_page(page);
return ret;
}
@@ -623,7 +650,7 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control *
{
int ret;
- ret = nfs_do_writepage(page, wbc, data);
+ ret = nfs_do_writepage(page, wbc, data, false);
unlock_page(page);
return ret;
}
@@ -803,11 +830,10 @@ EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked);
* holding the nfs_page lock.
*/
void
-nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
- struct nfs_commit_info *cinfo)
+nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo)
{
spin_lock(cinfo->lock);
- nfs_request_add_commit_list_locked(req, dst, cinfo);
+ nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo);
spin_unlock(cinfo->lock);
nfs_mark_page_unstable(req->wb_page, cinfo);
}
@@ -865,7 +891,7 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
{
if (pnfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx))
return;
- nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
+ nfs_request_add_commit_list(req, cinfo);
}
static void
@@ -1128,7 +1154,8 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
if (req == NULL)
return 0;
l_ctx = req->wb_lock_context;
- do_flush = req->wb_page != page || req->wb_context != ctx;
+ do_flush = req->wb_page != page ||
+ !nfs_match_open_context(req->wb_context, ctx);
/* for now, flush if more than 1 request in page_group */
do_flush |= req->wb_this_page != req;
if (l_ctx && flctx &&
@@ -1326,9 +1353,15 @@ static void nfs_async_write_error(struct list_head *head)
}
}
+static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr)
+{
+ nfs_async_write_error(&hdr->pages);
+}
+
static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
.error_cleanup = nfs_async_write_error,
.completion = nfs_write_completion,
+ .reschedule_io = nfs_async_write_reschedule_io,
};
void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
@@ -1529,27 +1562,21 @@ static void nfs_writeback_result(struct rpc_task *task,
}
}
-
-static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
+static int wait_on_commit(struct nfs_mds_commit_info *cinfo)
{
- int ret;
+ return wait_on_atomic_t(&cinfo->rpcs_out,
+ nfs_wait_atomic_killable, TASK_KILLABLE);
+}
- if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
- return 1;
- if (!may_wait)
- return 0;
- ret = out_of_line_wait_on_bit_lock(&nfsi->flags,
- NFS_INO_COMMIT,
- nfs_wait_bit_killable,
- TASK_KILLABLE);
- return (ret < 0) ? ret : 1;
+static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
+{
+ atomic_inc(&cinfo->rpcs_out);
}
-static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
+static void nfs_commit_end(struct nfs_mds_commit_info *cinfo)
{
- clear_bit(NFS_INO_COMMIT, &nfsi->flags);
- smp_mb__after_atomic();
- wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
+ if (atomic_dec_and_test(&cinfo->rpcs_out))
+ wake_up_atomic_t(&cinfo->rpcs_out);
}
void nfs_commitdata_release(struct nfs_commit_data *data)
@@ -1666,6 +1693,13 @@ void nfs_retry_commit(struct list_head *page_list,
}
EXPORT_SYMBOL_GPL(nfs_retry_commit);
+static void
+nfs_commit_resched_write(struct nfs_commit_info *cinfo,
+ struct nfs_page *req)
+{
+ __set_page_dirty_nobuffers(req->wb_page);
+}
+
/*
* Commit dirty pages
*/
@@ -1687,7 +1721,6 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
data->mds_ops, how, 0);
out_bad:
nfs_retry_commit(head, NULL, cinfo, 0);
- cinfo->completion_ops->error_cleanup(NFS_I(inode));
return -ENOMEM;
}
@@ -1749,8 +1782,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
nfs_init_cinfo(&cinfo, data->inode, data->dreq);
- if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
- nfs_commit_clear_lock(NFS_I(data->inode));
+ nfs_commit_end(cinfo.mds);
}
static void nfs_commit_release(void *calldata)
@@ -1769,7 +1801,7 @@ static const struct rpc_call_ops nfs_commit_ops = {
static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
.completion = nfs_commit_release_pages,
- .error_cleanup = nfs_commit_clear_lock,
+ .resched_write = nfs_commit_resched_write,
};
int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
@@ -1788,30 +1820,25 @@ int nfs_commit_inode(struct inode *inode, int how)
LIST_HEAD(head);
struct nfs_commit_info cinfo;
int may_wait = how & FLUSH_SYNC;
+ int error = 0;
int res;
- res = nfs_commit_set_lock(NFS_I(inode), may_wait);
- if (res <= 0)
- goto out_mark_dirty;
nfs_init_cinfo_from_inode(&cinfo, inode);
+ nfs_commit_begin(cinfo.mds);
res = nfs_scan_commit(inode, &head, &cinfo);
- if (res) {
- int error;
-
+ if (res)
error = nfs_generic_commit_list(inode, &head, how, &cinfo);
- if (error < 0)
- return error;
- if (!may_wait)
- goto out_mark_dirty;
- error = wait_on_bit_action(&NFS_I(inode)->flags,
- NFS_INO_COMMIT,
- nfs_wait_bit_killable,
- TASK_KILLABLE);
- if (error < 0)
- return error;
- } else
- nfs_commit_clear_lock(NFS_I(inode));
+ nfs_commit_end(cinfo.mds);
+ if (error < 0)
+ goto out_error;
+ if (!may_wait)
+ goto out_mark_dirty;
+ error = wait_on_commit(cinfo.mds);
+ if (error < 0)
+ return error;
return res;
+out_error:
+ res = error;
/* Note: If we exit without ensuring that the commit is complete,
* we must mark the inode as dirty. Otherwise, future calls to
* sync_inode() with the WB_SYNC_ALL flag set will fail to ensure
@@ -1821,6 +1848,7 @@ out_mark_dirty:
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
return res;
}
+EXPORT_SYMBOL_GPL(nfs_commit_inode);
int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
{
@@ -1911,7 +1939,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
/*
* Write back all requests on one page - we do this before reading it.
*/
-int nfs_wb_page(struct inode *inode, struct page *page)
+int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder)
{
loff_t range_start = page_file_offset(page);
loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
@@ -1928,7 +1956,7 @@ int nfs_wb_page(struct inode *inode, struct page *page)
for (;;) {
wait_on_page_writeback(page);
if (clear_page_dirty_for_io(page)) {
- ret = nfs_writepage_locked(page, &wbc);
+ ret = nfs_writepage_locked(page, &wbc, launder);
if (ret < 0)
goto out_error;
continue;
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 77e7a5cca..1a03bc305 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -58,7 +58,7 @@ nlm_fclose(struct file *filp)
fput(filp);
}
-static struct nlmsvc_binding nfsd_nlm_ops = {
+static const struct nlmsvc_binding nfsd_nlm_ops = {
.fopen = nlm_fopen, /* open file for locking */
.fclose = nlm_fclose, /* close file */
};
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index d8b16c256..5fbf3bbd0 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -92,7 +92,7 @@ struct nfsd_net {
struct file *rec_file;
bool in_grace;
- struct nfsd4_client_tracking_ops *client_tracking_ops;
+ const struct nfsd4_client_tracking_ops *client_tracking_ops;
time_t nfsd4_lease;
time_t nfsd4_grace;
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 00575d776..2246454de 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -823,7 +823,7 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
} else
dchild = dget(dparent);
} else
- dchild = lookup_one_len(name, dparent, namlen);
+ dchild = lookup_one_len_unlocked(name, dparent, namlen);
if (IS_ERR(dchild))
return rv;
if (d_mountpoint(dchild))
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index e7f50c408..7389cb1d7 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -792,12 +792,16 @@ static void warn_no_callback_path(struct nfs4_client *clp, int reason)
static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason)
{
+ if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags))
+ return;
clp->cl_cb_state = NFSD4_CB_DOWN;
warn_no_callback_path(clp, reason);
}
static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason)
{
+ if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags))
+ return;
clp->cl_cb_state = NFSD4_CB_FAULT;
warn_no_callback_path(clp, reason);
}
@@ -1143,7 +1147,7 @@ nfsd4_run_cb_work(struct work_struct *work)
}
void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
- struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op)
+ const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op)
{
cb->cb_clp = clp;
cb->cb_msg.rpc_proc = &nfs4_cb_procedures[op];
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index c9d6c715c..ce2d010d3 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -22,7 +22,7 @@ struct nfs4_layout {
static struct kmem_cache *nfs4_layout_cache;
static struct kmem_cache *nfs4_layout_stateid_cache;
-static struct nfsd4_callback_ops nfsd4_cb_layout_ops;
+static const struct nfsd4_callback_ops nfsd4_cb_layout_ops;
static const struct lock_manager_operations nfsd4_layouts_lm_ops;
const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] = {
@@ -624,24 +624,39 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
{
struct nfs4_layout_stateid *ls =
container_of(cb, struct nfs4_layout_stateid, ls_recall);
+ struct nfsd_net *nn;
+ ktime_t now, cutoff;
LIST_HEAD(reaplist);
+
switch (task->tk_status) {
case 0:
- return 1;
+ case -NFS4ERR_DELAY:
+ /*
+ * Anything left? If not, then call it done. Note that we don't
+ * take the spinlock since this is an optimization and nothing
+ * should get added until the cb counter goes to zero.
+ */
+ if (list_empty(&ls->ls_layouts))
+ return 1;
+
+ /* Poll the client until it's done with the layout */
+ now = ktime_get();
+ nn = net_generic(ls->ls_stid.sc_client->net, nfsd_net_id);
+
+ /* Client gets 2 lease periods to return it */
+ cutoff = ktime_add_ns(task->tk_start,
+ nn->nfsd4_lease * NSEC_PER_SEC * 2);
+
+ if (ktime_before(now, cutoff)) {
+ rpc_delay(task, HZ/100); /* 10 mili-seconds */
+ return 0;
+ }
+ /* Fallthrough */
case -NFS4ERR_NOMATCHING_LAYOUT:
trace_layout_recall_done(&ls->ls_stid.sc_stateid);
task->tk_status = 0;
return 1;
- case -NFS4ERR_DELAY:
- /* Poll the client until it's done with the layout */
- /* FIXME: cap number of retries.
- * The pnfs standard states that we need to only expire
- * the client after at-least "lease time" .eg lease-time * 2
- * when failing to communicate a recall
- */
- rpc_delay(task, HZ/100); /* 10 mili-seconds */
- return 0;
default:
/*
* Unknown error or non-responding client, we'll need to fence.
@@ -665,7 +680,7 @@ nfsd4_cb_layout_release(struct nfsd4_callback *cb)
nfs4_put_stid(&ls->ls_stid);
}
-static struct nfsd4_callback_ops nfsd4_cb_layout_ops = {
+static const struct nfsd4_callback_ops nfsd4_cb_layout_ops = {
.prepare = nfsd4_cb_layout_prepare,
.done = nfsd4_cb_layout_done,
.release = nfsd4_cb_layout_release,
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a9f096c7e..4cba7865f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -55,10 +55,10 @@ nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u
struct inode *inode = d_inode(resfh->fh_dentry);
int status;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
status = security_inode_setsecctx(resfh->fh_dentry,
label->data, label->len);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (status)
/*
@@ -774,8 +774,9 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* check stateid */
- status = nfs4_preprocess_stateid_op(rqstp, cstate, &read->rd_stateid,
- RD_STATE, &read->rd_filp, &read->rd_tmp_file);
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ &read->rd_stateid, RD_STATE,
+ &read->rd_filp, &read->rd_tmp_file);
if (status) {
dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
goto out;
@@ -921,7 +922,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
status = nfs4_preprocess_stateid_op(rqstp, cstate,
- &setattr->sa_stateid, WR_STATE, NULL, NULL);
+ &cstate->current_fh, &setattr->sa_stateid,
+ WR_STATE, NULL, NULL);
if (status) {
dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
return status;
@@ -985,8 +987,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (write->wr_offset >= OFFSET_MAX)
return nfserr_inval;
- status = nfs4_preprocess_stateid_op(rqstp, cstate, stateid, WR_STATE,
- &filp, NULL);
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ stateid, WR_STATE, &filp, NULL);
if (status) {
dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
return status;
@@ -1010,13 +1012,54 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
}
static __be32
+nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_clone *clone)
+{
+ struct file *src, *dst;
+ __be32 status;
+
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
+ &clone->cl_src_stateid, RD_STATE,
+ &src, NULL);
+ if (status) {
+ dprintk("NFSD: %s: couldn't process src stateid!\n", __func__);
+ goto out;
+ }
+
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ &clone->cl_dst_stateid, WR_STATE,
+ &dst, NULL);
+ if (status) {
+ dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
+ goto out_put_src;
+ }
+
+ /* fix up for NFS-specific error code */
+ if (!S_ISREG(file_inode(src)->i_mode) ||
+ !S_ISREG(file_inode(dst)->i_mode)) {
+ status = nfserr_wrong_type;
+ goto out_put_dst;
+ }
+
+ status = nfsd4_clone_file_range(src, clone->cl_src_pos,
+ dst, clone->cl_dst_pos, clone->cl_count);
+
+out_put_dst:
+ fput(dst);
+out_put_src:
+ fput(src);
+out:
+ return status;
+}
+
+static __be32
nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_fallocate *fallocate, int flags)
{
__be32 status = nfserr_notsupp;
struct file *file;
- status = nfs4_preprocess_stateid_op(rqstp, cstate,
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&fallocate->falloc_stateid,
WR_STATE, &file, NULL);
if (status != nfs_ok) {
@@ -1055,7 +1098,7 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status;
struct file *file;
- status = nfs4_preprocess_stateid_op(rqstp, cstate,
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&seek->seek_stateid,
RD_STATE, &file, NULL);
if (status) {
@@ -2279,6 +2322,12 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_name = "OP_DEALLOCATE",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
+ [OP_CLONE] = {
+ .op_func = (nfsd4op_func)nfsd4_clone,
+ .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_name = "OP_CLONE",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+ },
[OP_SEEK] = {
.op_func = (nfsd4op_func)nfsd4_seek,
.op_name = "OP_SEEK",
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index e3d47091b..dc8ebecf5 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -192,7 +192,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
dir = nn->rec_file->f_path.dentry;
/* lock the parent */
- mutex_lock(&d_inode(dir)->i_mutex);
+ inode_lock(d_inode(dir));
dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1);
if (IS_ERR(dentry)) {
@@ -213,7 +213,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
out_put:
dput(dentry);
out_unlock:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
if (status == 0) {
if (nn->in_grace) {
crp = nfs4_client_to_reclaim(dname, nn);
@@ -286,7 +286,7 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
}
status = iterate_dir(nn->rec_file, &ctx.ctx);
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
if (!status) {
@@ -302,7 +302,7 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
list_del(&entry->list);
kfree(entry);
}
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
nfs4_reset_creds(original_cred);
list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
@@ -322,7 +322,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
dir = nn->rec_file->f_path.dentry;
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
dentry = lookup_one_len(name, dir, namlen);
if (IS_ERR(dentry)) {
status = PTR_ERR(dentry);
@@ -335,7 +335,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
out:
dput(dentry);
out_unlock:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
return status;
}
@@ -631,7 +631,7 @@ nfsd4_check_legacy_client(struct nfs4_client *clp)
return -ENOENT;
}
-static struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = {
+static const struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = {
.init = nfsd4_legacy_tracking_init,
.exit = nfsd4_legacy_tracking_exit,
.create = nfsd4_create_clid_dir,
@@ -1050,7 +1050,7 @@ out_err:
printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret);
}
-static struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
.init = nfsd4_init_cld_pipe,
.exit = nfsd4_remove_cld_pipe,
.create = nfsd4_cld_create,
@@ -1394,7 +1394,7 @@ nfsd4_umh_cltrack_grace_done(struct nfsd_net *nn)
kfree(legacy);
}
-static struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = {
+static const struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = {
.init = nfsd4_umh_cltrack_init,
.exit = NULL,
.create = nfsd4_umh_cltrack_create,
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6b800b5b8..c484a2b6c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -98,7 +98,7 @@ static struct kmem_cache *odstate_slab;
static void free_session(struct nfsd4_session *);
-static struct nfsd4_callback_ops nfsd4_cb_recall_ops;
+static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
static bool is_session_dead(struct nfsd4_session *ses)
{
@@ -1857,15 +1857,28 @@ static void copy_clid(struct nfs4_client *target, struct nfs4_client *source)
target->cl_clientid.cl_id = source->cl_clientid.cl_id;
}
-static int copy_cred(struct svc_cred *target, struct svc_cred *source)
+int strdup_if_nonnull(char **target, char *source)
{
- if (source->cr_principal) {
- target->cr_principal =
- kstrdup(source->cr_principal, GFP_KERNEL);
- if (target->cr_principal == NULL)
+ if (source) {
+ *target = kstrdup(source, GFP_KERNEL);
+ if (!*target)
return -ENOMEM;
} else
- target->cr_principal = NULL;
+ *target = NULL;
+ return 0;
+}
+
+static int copy_cred(struct svc_cred *target, struct svc_cred *source)
+{
+ int ret;
+
+ ret = strdup_if_nonnull(&target->cr_principal, source->cr_principal);
+ if (ret)
+ return ret;
+ ret = strdup_if_nonnull(&target->cr_raw_principal,
+ source->cr_raw_principal);
+ if (ret)
+ return ret;
target->cr_flavor = source->cr_flavor;
target->cr_uid = source->cr_uid;
target->cr_gid = source->cr_gid;
@@ -1969,6 +1982,9 @@ static bool mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp)
return false;
if (!svc_rqst_integrity_protected(rqstp))
return false;
+ if (cl->cl_cred.cr_raw_principal)
+ return 0 == strcmp(cl->cl_cred.cr_raw_principal,
+ cr->cr_raw_principal);
if (!cr->cr_principal)
return false;
return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal);
@@ -2240,7 +2256,8 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
base = resp->cstate.data_offset;
slot->sl_datalen = buf->len - base;
if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen))
- WARN("%s: sessions DRC could not cache compound\n", __func__);
+ WARN(1, "%s: sessions DRC could not cache compound\n",
+ __func__);
return;
}
@@ -2365,10 +2382,27 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
if (exid->flags & ~EXCHGID4_FLAG_MASK_A)
return nfserr_inval;
+ new = create_client(exid->clname, rqstp, &verf);
+ if (new == NULL)
+ return nfserr_jukebox;
+
switch (exid->spa_how) {
case SP4_MACH_CRED:
- if (!svc_rqst_integrity_protected(rqstp))
- return nfserr_inval;
+ if (!svc_rqst_integrity_protected(rqstp)) {
+ status = nfserr_inval;
+ goto out_nolock;
+ }
+ /*
+ * Sometimes userspace doesn't give us a principal.
+ * Which is a bug, really. Anyway, we can't enforce
+ * MACH_CRED in that case, better to give up now:
+ */
+ if (!new->cl_cred.cr_principal &&
+ !new->cl_cred.cr_raw_principal) {
+ status = nfserr_serverfault;
+ goto out_nolock;
+ }
+ new->cl_mach_cred = true;
case SP4_NONE:
break;
default: /* checked by xdr code */
@@ -2377,10 +2411,6 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
return nfserr_encr_alg_unsupp;
}
- new = create_client(exid->clname, rqstp, &verf);
- if (new == NULL)
- return nfserr_jukebox;
-
/* Cases below refer to rfc 5661 section 18.35.4: */
spin_lock(&nn->client_lock);
conf = find_confirmed_client_by_name(&exid->clname, nn);
@@ -2442,7 +2472,6 @@ out_new:
goto out;
}
new->cl_minorversion = cstate->minorversion;
- new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED);
gen_clid(new, nn);
add_to_unconfirmed(new);
@@ -2460,6 +2489,7 @@ out_copy:
out:
spin_unlock(&nn->client_lock);
+out_nolock:
if (new)
expire_client(new);
if (unconf)
@@ -3648,7 +3678,7 @@ static void nfsd4_cb_recall_release(struct nfsd4_callback *cb)
nfs4_put_stid(&dp->dl_stid);
}
-static struct nfsd4_callback_ops nfsd4_cb_recall_ops = {
+static const struct nfsd4_callback_ops nfsd4_cb_recall_ops = {
.prepare = nfsd4_cb_recall_prepare,
.done = nfsd4_cb_recall_done,
.release = nfsd4_cb_recall_release,
@@ -4541,8 +4571,7 @@ static void
laundromat_main(struct work_struct *laundry)
{
time_t t;
- struct delayed_work *dwork = container_of(laundry, struct delayed_work,
- work);
+ struct delayed_work *dwork = to_delayed_work(laundry);
struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
laundromat_work);
@@ -4797,10 +4826,9 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
*/
__be32
nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
- struct nfsd4_compound_state *cstate, stateid_t *stateid,
- int flags, struct file **filpp, bool *tmp_file)
+ struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
+ stateid_t *stateid, int flags, struct file **filpp, bool *tmp_file)
{
- struct svc_fh *fhp = &cstate->current_fh;
struct inode *ino = d_inode(fhp->fh_dentry);
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 51c9e9ca3..d6ef0955a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1675,6 +1675,25 @@ nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
}
static __be32
+nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone)
+{
+ DECODE_HEAD;
+
+ status = nfsd4_decode_stateid(argp, &clone->cl_src_stateid);
+ if (status)
+ return status;
+ status = nfsd4_decode_stateid(argp, &clone->cl_dst_stateid);
+ if (status)
+ return status;
+
+ READ_BUF(8 + 8 + 8);
+ p = xdr_decode_hyper(p, &clone->cl_src_pos);
+ p = xdr_decode_hyper(p, &clone->cl_dst_pos);
+ p = xdr_decode_hyper(p, &clone->cl_count);
+ DECODE_TAIL;
+}
+
+static __be32
nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
{
DECODE_HEAD;
@@ -1785,6 +1804,7 @@ static nfsd4_dec nfsd4_dec_ops[] = {
[OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek,
[OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone,
};
static inline bool
@@ -2838,14 +2858,14 @@ nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
__be32 nfserr;
int ignore_crossmnt = 0;
- dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
+ dentry = lookup_one_len_unlocked(name, cd->rd_fhp->fh_dentry, namlen);
if (IS_ERR(dentry))
return nfserrno(PTR_ERR(dentry));
if (d_really_is_negative(dentry)) {
/*
- * nfsd_buffered_readdir drops the i_mutex between
- * readdir and calling this callback, leaving a window
- * where this directory entry could have gone away.
+ * we're not holding the i_mutex here, so there's
+ * a window where this directory entry could have gone
+ * away.
*/
dput(dentry);
return nfserr_noent;
@@ -4292,6 +4312,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {
[OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop,
[OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek,
[OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop,
};
/*
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 2087bae17..f84fe6bf9 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -7,6 +7,7 @@
#ifndef _LINUX_NFSD_NFSFH_H
#define _LINUX_NFSD_NFSFH_H
+#include <linux/crc32.h>
#include <linux/sunrpc/svc.h>
#include <uapi/linux/nfsd/nfsfh.h>
@@ -205,6 +206,28 @@ static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
return true;
}
+#ifdef CONFIG_CRC32
+/**
+ * knfsd_fh_hash - calculate the crc32 hash for the filehandle
+ * @fh - pointer to filehandle
+ *
+ * returns a crc32 hash for the filehandle that is compatible with
+ * the one displayed by "wireshark".
+ */
+
+static inline u32
+knfsd_fh_hash(struct knfsd_fh *fh)
+{
+ return ~crc32_le(0xFFFFFFFF, (unsigned char *)&fh->fh_base, fh->fh_size);
+}
+#else
+static inline u32
+knfsd_fh_hash(struct knfsd_fh *fh)
+{
+ return 0;
+}
+#endif
+
#ifdef CONFIG_NFSD_V3
/*
* The wcc data stored in current_fh should be cleared
@@ -265,7 +288,7 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
}
inode = d_inode(dentry);
- mutex_lock_nested(&inode->i_mutex, subclass);
+ inode_lock_nested(inode, subclass);
fill_pre_wcc(fhp);
fhp->fh_locked = true;
}
@@ -284,7 +307,7 @@ fh_unlock(struct svc_fh *fhp)
{
if (fhp->fh_locked) {
fill_post_wcc(fhp);
- mutex_unlock(&d_inode(fhp->fh_dentry)->i_mutex);
+ inode_unlock(d_inode(fhp->fh_dentry));
fhp->fh_locked = false;
}
}
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index ad4e2377d..45007acaf 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -14,9 +14,13 @@
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/svcsock.h>
+#include <linux/sunrpc/svc_xprt.h>
#include <linux/lockd/bind.h>
#include <linux/nfsacl.h>
#include <linux/seq_file.h>
+#include <linux/inetdevice.h>
+#include <net/addrconf.h>
+#include <net/ipv6.h>
#include <net/net_namespace.h>
#include "nfsd.h"
#include "cache.h"
@@ -306,22 +310,81 @@ static void nfsd_shutdown_net(struct net *net)
nfsd_shutdown_generic();
}
+static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
+ struct net_device *dev = ifa->ifa_dev->dev;
+ struct net *net = dev_net(dev);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct sockaddr_in sin;
+
+ if (event != NETDEV_DOWN)
+ goto out;
+
+ if (nn->nfsd_serv) {
+ dprintk("nfsd_inetaddr_event: removed %pI4\n", &ifa->ifa_local);
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = ifa->ifa_local;
+ svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin);
+ }
+
+out:
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block nfsd_inetaddr_notifier = {
+ .notifier_call = nfsd_inetaddr_event,
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int nfsd_inet6addr_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
+ struct net_device *dev = ifa->idev->dev;
+ struct net *net = dev_net(dev);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct sockaddr_in6 sin6;
+
+ if (event != NETDEV_DOWN)
+ goto out;
+
+ if (nn->nfsd_serv) {
+ dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr);
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_addr = ifa->addr;
+ svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6);
+ }
+
+out:
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block nfsd_inet6addr_notifier = {
+ .notifier_call = nfsd_inet6addr_event,
+};
+#endif
+
static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ unregister_inetaddr_notifier(&nfsd_inetaddr_notifier);
+#if IS_ENABLED(CONFIG_IPV6)
+ unregister_inet6addr_notifier(&nfsd_inet6addr_notifier);
+#endif
/*
* write_ports can create the server without actually starting
* any threads--if we get shut down before any threads are
* started, then nfsd_last_thread will be run before any of this
- * other initialization has been done.
+ * other initialization has been done except the rpcb information.
*/
+ svc_rpcb_cleanup(serv, net);
if (!nn->nfsd_net_up)
return;
- nfsd_shutdown_net(net);
-
- svc_rpcb_cleanup(serv, net);
+ nfsd_shutdown_net(net);
printk(KERN_WARNING "nfsd: last server has exited, flushing export "
"cache\n");
nfsd_export_flush(net);
@@ -425,6 +488,10 @@ int nfsd_create_serv(struct net *net)
}
set_max_drc();
+ register_inetaddr_notifier(&nfsd_inetaddr_notifier);
+#if IS_ENABLED(CONFIG_IPV6)
+ register_inet6addr_notifier(&nfsd_inet6addr_notifier);
+#endif
do_gettimeofday(&nn->nfssvc_boot); /* record boot time */
return 0;
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 77fdf4de9..c050c5303 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -65,7 +65,7 @@ struct nfsd4_callback {
struct nfs4_client *cb_clp;
u32 cb_minorversion;
struct rpc_message cb_msg;
- struct nfsd4_callback_ops *cb_ops;
+ const struct nfsd4_callback_ops *cb_ops;
struct work_struct cb_work;
int cb_seq_status;
int cb_status;
@@ -578,8 +578,8 @@ struct nfsd4_compound_state;
struct nfsd_net;
extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
- struct nfsd4_compound_state *cstate, stateid_t *stateid,
- int flags, struct file **filp, bool *tmp_file);
+ struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
+ stateid_t *stateid, int flags, struct file **filp, bool *tmp_file);
__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
stateid_t *stateid, unsigned char typemask,
struct nfs4_stid **s, struct nfsd_net *nn);
@@ -599,7 +599,7 @@ extern void nfsd4_probe_callback(struct nfs4_client *clp);
extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
- struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op);
+ const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op);
extern void nfsd4_run_cb(struct nfsd4_callback *cb);
extern int nfsd4_create_callback_queue(void);
extern void nfsd4_destroy_callback_queue(void);
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 0befe7627..328704190 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -8,6 +8,47 @@
#define _NFSD_TRACE_H
#include <linux/tracepoint.h>
+#include "nfsfh.h"
+
+DECLARE_EVENT_CLASS(nfsd_io_class,
+ TP_PROTO(struct svc_rqst *rqstp,
+ struct svc_fh *fhp,
+ loff_t offset,
+ int len),
+ TP_ARGS(rqstp, fhp, offset, len),
+ TP_STRUCT__entry(
+ __field(__be32, xid)
+ __field_struct(struct knfsd_fh, fh)
+ __field(loff_t, offset)
+ __field(int, len)
+ ),
+ TP_fast_assign(
+ __entry->xid = rqstp->rq_xid,
+ fh_copy_shallow(&__entry->fh, &fhp->fh_handle);
+ __entry->offset = offset;
+ __entry->len = len;
+ ),
+ TP_printk("xid=0x%x fh=0x%x offset=%lld len=%d",
+ __be32_to_cpu(__entry->xid), knfsd_fh_hash(&__entry->fh),
+ __entry->offset, __entry->len)
+)
+
+#define DEFINE_NFSD_IO_EVENT(name) \
+DEFINE_EVENT(nfsd_io_class, name, \
+ TP_PROTO(struct svc_rqst *rqstp, \
+ struct svc_fh *fhp, \
+ loff_t offset, \
+ int len), \
+ TP_ARGS(rqstp, fhp, offset, len))
+
+DEFINE_NFSD_IO_EVENT(read_start);
+DEFINE_NFSD_IO_EVENT(read_opened);
+DEFINE_NFSD_IO_EVENT(read_io_done);
+DEFINE_NFSD_IO_EVENT(read_done);
+DEFINE_NFSD_IO_EVENT(write_start);
+DEFINE_NFSD_IO_EVENT(write_opened);
+DEFINE_NFSD_IO_EVENT(write_io_done);
+DEFINE_NFSD_IO_EVENT(write_done);
#include "state.h"
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 994d66fbb..5d2a57e4c 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -36,12 +36,14 @@
#endif /* CONFIG_NFSD_V3 */
#ifdef CONFIG_NFSD_V4
+#include "../internal.h"
#include "acl.h"
#include "idmap.h"
#endif /* CONFIG_NFSD_V4 */
#include "nfsd.h"
#include "vfs.h"
+#include "trace.h"
#define NFSDDBG_FACILITY NFSDDBG_FILEOP
@@ -217,10 +219,16 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
host_err = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out_nfserr;
- /*
- * check if we have crossed a mount point ...
- */
if (nfsd_mountpoint(dentry, exp)) {
+ /*
+ * We don't need the i_mutex after all. It's
+ * still possible we could open this (regular
+ * files can be mountpoints too), but the
+ * i_mutex is just there to prevent renames of
+ * something that we might be about to delegate,
+ * and a mountpoint won't be renamed:
+ */
+ fh_unlock(fhp);
if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
dput(dentry);
goto out_nfserr;
@@ -485,9 +493,9 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
dentry = fhp->fh_dentry;
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
host_error = security_inode_setsecctx(dentry, label->data, label->len);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
return nfserrno(host_error);
}
#else
@@ -498,6 +506,13 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
#endif
+__be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
+ u64 dst_pos, u64 count)
+{
+ return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos,
+ count));
+}
+
__be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, loff_t len,
int flags)
@@ -983,16 +998,23 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct raparms *ra;
__be32 err;
+ trace_read_start(rqstp, fhp, offset, vlen);
err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
if (err)
return err;
ra = nfsd_init_raparms(file);
+
+ trace_read_opened(rqstp, fhp, offset, vlen);
err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count);
+ trace_read_io_done(rqstp, fhp, offset, vlen);
+
if (ra)
nfsd_put_raparams(file, ra);
fput(file);
+ trace_read_done(rqstp, fhp, offset, vlen);
+
return err;
}
@@ -1008,24 +1030,31 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
{
__be32 err = 0;
+ trace_write_start(rqstp, fhp, offset, vlen);
+
if (file) {
err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE);
if (err)
goto out;
+ trace_write_opened(rqstp, fhp, offset, vlen);
err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
stablep);
+ trace_write_io_done(rqstp, fhp, offset, vlen);
} else {
err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
if (err)
goto out;
+ trace_write_opened(rqstp, fhp, offset, vlen);
if (cnt)
err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
cnt, stablep);
+ trace_write_io_done(rqstp, fhp, offset, vlen);
fput(file);
}
out:
+ trace_write_done(rqstp, fhp, offset, vlen);
return err;
}
@@ -1809,7 +1838,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
offset = *offsetp;
while (1) {
- struct inode *dir_inode = file_inode(file);
unsigned int reclen;
cdp->err = nfserr_eof; /* will be cleared on successful read */
@@ -1828,15 +1856,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
if (!size)
break;
- /*
- * Various filldir functions may end up calling back into
- * lookup_one_len() and the file system's ->lookup() method.
- * These expect i_mutex to be held, as it would within readdir.
- */
- host_err = mutex_lock_killable(&dir_inode->i_mutex);
- if (host_err)
- break;
-
de = (struct buffered_dirent *)buf.dirent;
while (size > 0) {
offset = de->offset;
@@ -1853,7 +1872,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
size -= reclen;
de = (struct buffered_dirent *)((char *)de + reclen);
}
- mutex_unlock(&dir_inode->i_mutex);
if (size > 0) /* We bailed out early */
break;
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index fcfc48cbe..c11ba316f 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -56,6 +56,8 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *,
struct xdr_netobj *);
__be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
struct file *, loff_t, loff_t, int);
+__be32 nfsd4_clone_file_range(struct file *, u64, struct file *,
+ u64, u64);
#endif /* CONFIG_NFSD_V4 */
__be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
char *name, int len, struct iattr *attrs,
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index ce7362c88..d9554813e 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -491,6 +491,15 @@ struct nfsd4_fallocate {
u64 falloc_length;
};
+struct nfsd4_clone {
+ /* request */
+ stateid_t cl_src_stateid;
+ stateid_t cl_dst_stateid;
+ u64 cl_src_pos;
+ u64 cl_dst_pos;
+ u64 cl_count;
+};
+
struct nfsd4_seek {
/* request */
stateid_t seek_stateid;
@@ -555,6 +564,7 @@ struct nfsd4_op {
/* NFSv4.2 */
struct nfsd4_fallocate allocate;
struct nfsd4_fallocate deallocate;
+ struct nfsd4_clone clone;
struct nfsd4_seek seek;
} u;
struct nfs4_replay * replay;
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index ac2f64943..21a1e2e0d 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -510,6 +510,7 @@ static int __nilfs_read_inode(struct super_block *sb,
inode->i_mapping->a_ops = &nilfs_aops;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &nilfs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &nilfs_aops;
} else {
inode->i_op = &nilfs_special_inode_operations;
@@ -1002,7 +1003,7 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
isize = i_size_read(inode);
@@ -1112,6 +1113,6 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
if (ret == 1)
ret = 0;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index aba43811d..e8fe24882 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -158,7 +158,7 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
flags = nilfs_mask_flags(inode->i_mode, flags);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
oldflags = NILFS_I(inode)->i_flags;
@@ -186,7 +186,7 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
nilfs_mark_inode_dirty(inode);
ret = nilfs_transaction_commit(inode->i_sb);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
}
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index c9a1a491a..7ccdb961e 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -161,6 +161,7 @@ static int nilfs_symlink(struct inode *dir, struct dentry *dentry,
/* slow symlink */
inode->i_op = &nilfs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &nilfs_aops;
err = page_symlink(inode, symname, l);
if (err)
@@ -568,8 +569,7 @@ const struct inode_operations nilfs_special_inode_operations = {
const struct inode_operations nilfs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.permission = nilfs_permission,
};
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 354013ea2..7f5d3d9f1 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1316,13 +1316,11 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
}
if (!s->s_root) {
- char b[BDEVNAME_SIZE];
-
- s_new = true;
+ s_new = true;
/* New superblock instance created */
s->s_mode = mode;
- strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id));
+ snprintf(s->s_id, sizeof(s->s_id), "%pg", sd.bdev);
sb_set_blocksize(s, block_size(sd.bdev));
err = nilfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
@@ -1418,7 +1416,8 @@ static int __init nilfs_init_cachep(void)
{
nilfs_inode_cachep = kmem_cache_create("nilfs2_inode_cache",
sizeof(struct nilfs_inode_info), 0,
- SLAB_RECLAIM_ACCOUNT, nilfs_inode_init_once);
+ SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT,
+ nilfs_inode_init_once);
if (!nilfs_inode_cachep)
goto fail;
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 53e45b61d..d16b62cb2 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -22,7 +22,6 @@
#include <linux/srcu.h>
#include <linux/rculist.h>
#include <linux/wait.h>
-#include <linux/module.h>
#include <linux/fsnotify_backend.h>
#include "fsnotify.h"
@@ -73,7 +72,6 @@ void fsnotify_get_group(struct fsnotify_group *group)
{
atomic_inc(&group->refcnt);
}
-EXPORT_SYMBOL_GPL(fsnotify_get_group);
/*
* Drop a reference to a group. Free it if it's through.
@@ -83,7 +81,6 @@ void fsnotify_put_group(struct fsnotify_group *group)
if (atomic_dec_and_test(&group->refcnt))
fsnotify_final_destroy_group(group);
}
-EXPORT_SYMBOL_GPL(fsnotify_put_group);
/*
* Create a new fsnotify_group and hold a reference for the group returned.
@@ -112,7 +109,6 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
return group;
}
-EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
int fsnotify_fasync(int fd, struct file *file, int on)
{
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index e785fd954..741077dee 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -199,8 +199,7 @@ void fsnotify_unmount_inodes(struct super_block *sb)
break;
}
spin_unlock(&next_i->i_lock);
- next_i = list_entry(next_i->i_sb_list.next,
- struct inode, i_sb_list);
+ next_i = list_next_entry(next_i, i_sb_list);
}
/*
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 8175f3cd4..7115c5d7d 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -91,10 +91,14 @@
#include <linux/fsnotify_backend.h>
#include "fsnotify.h"
+#define FSNOTIFY_REAPER_DELAY (1) /* 1 jiffy */
+
struct srcu_struct fsnotify_mark_srcu;
static DEFINE_SPINLOCK(destroy_lock);
static LIST_HEAD(destroy_list);
-static DECLARE_WAIT_QUEUE_HEAD(destroy_waitq);
+
+static void fsnotify_mark_destroy(struct work_struct *work);
+static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy);
void fsnotify_get_mark(struct fsnotify_mark *mark)
{
@@ -109,7 +113,6 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
mark->free_mark(mark);
}
}
-EXPORT_SYMBOL_GPL(fsnotify_put_mark);
/* Calculate mask of events for a list of marks */
u32 fsnotify_recalc_mask(struct hlist_head *head)
@@ -190,7 +193,8 @@ void fsnotify_free_mark(struct fsnotify_mark *mark)
spin_lock(&destroy_lock);
list_add(&mark->g_list, &destroy_list);
spin_unlock(&destroy_lock);
- wake_up(&destroy_waitq);
+ queue_delayed_work(system_unbound_wq, &reaper_work,
+ FSNOTIFY_REAPER_DELAY);
/*
* Some groups like to know that marks are being freed. This is a
@@ -209,7 +213,6 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
mutex_unlock(&group->mark_mutex);
fsnotify_free_mark(mark);
}
-EXPORT_SYMBOL_GPL(fsnotify_destroy_mark);
void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock)
{
@@ -390,11 +393,11 @@ err:
spin_lock(&destroy_lock);
list_add(&mark->g_list, &destroy_list);
spin_unlock(&destroy_lock);
- wake_up(&destroy_waitq);
+ queue_delayed_work(system_unbound_wq, &reaper_work,
+ FSNOTIFY_REAPER_DELAY);
return ret;
}
-EXPORT_SYMBOL_GPL(fsnotify_add_mark);
int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
struct inode *inode, struct vfsmount *mnt, int allow_dups)
@@ -495,41 +498,21 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
atomic_set(&mark->refcnt, 1);
mark->free_mark = free_mark;
}
-EXPORT_SYMBOL_GPL(fsnotify_init_mark);
-static int fsnotify_mark_destroy(void *ignored)
+static void fsnotify_mark_destroy(struct work_struct *work)
{
struct fsnotify_mark *mark, *next;
struct list_head private_destroy_list;
- for (;;) {
- spin_lock(&destroy_lock);
- /* exchange the list head */
- list_replace_init(&destroy_list, &private_destroy_list);
- spin_unlock(&destroy_lock);
-
- synchronize_srcu(&fsnotify_mark_srcu);
+ spin_lock(&destroy_lock);
+ /* exchange the list head */
+ list_replace_init(&destroy_list, &private_destroy_list);
+ spin_unlock(&destroy_lock);
- list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) {
- list_del_init(&mark->g_list);
- fsnotify_put_mark(mark);
- }
+ synchronize_srcu(&fsnotify_mark_srcu);
- wait_event_interruptible(destroy_waitq, !list_empty(&destroy_list));
+ list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) {
+ list_del_init(&mark->g_list);
+ fsnotify_put_mark(mark);
}
-
- return 0;
-}
-
-static int __init fsnotify_mark_init(void)
-{
- struct task_struct *thread;
-
- thread = kthread_run(fsnotify_mark_destroy, NULL,
- "fsnotify_mark");
- if (IS_ERR(thread))
- panic("unable to start fsnotify mark destruction thread.");
-
- return 0;
}
-device_initcall(fsnotify_mark_init);
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 9e38dafa3..b2eff5816 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1509,7 +1509,7 @@ static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
err = filemap_write_and_wait_range(vi->i_mapping, start, end);
if (err)
return err;
- mutex_lock(&vi->i_mutex);
+ inode_lock(vi);
BUG_ON(!S_ISDIR(vi->i_mode));
/* If the bitmap attribute inode is in memory sync it, too. */
@@ -1532,7 +1532,7 @@ static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
else
ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error "
"%u.", datasync ? "data" : "", vi->i_ino, -ret);
- mutex_unlock(&vi->i_mutex);
+ inode_unlock(vi);
return ret;
}
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 9d383e5ef..bed4d427d 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1944,14 +1944,14 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
ssize_t written = 0;
ssize_t err;
- mutex_lock(&vi->i_mutex);
+ inode_lock(vi);
/* We can write back this queue in page reclaim. */
current->backing_dev_info = inode_to_bdi(vi);
err = ntfs_prepare_file_for_write(iocb, from);
if (iov_iter_count(from) && !err)
written = ntfs_perform_write(file, from, iocb->ki_pos);
current->backing_dev_info = NULL;
- mutex_unlock(&vi->i_mutex);
+ inode_unlock(vi);
if (likely(written > 0)) {
err = generic_write_sync(file, iocb->ki_pos, written);
if (err < 0)
@@ -1996,7 +1996,7 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
err = filemap_write_and_wait_range(vi->i_mapping, start, end);
if (err)
return err;
- mutex_lock(&vi->i_mutex);
+ inode_lock(vi);
BUG_ON(S_ISDIR(vi->i_mode));
if (!datasync || !NInoNonResident(NTFS_I(vi)))
@@ -2015,7 +2015,7 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
else
ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error "
"%u.", datasync ? "data" : "", vi->i_ino, -ret);
- mutex_unlock(&vi->i_mutex);
+ inode_unlock(vi);
return ret;
}
diff --git a/fs/ntfs/quota.c b/fs/ntfs/quota.c
index d80e3315c..9793e68ba 100644
--- a/fs/ntfs/quota.c
+++ b/fs/ntfs/quota.c
@@ -48,7 +48,7 @@ bool ntfs_mark_quotas_out_of_date(ntfs_volume *vol)
ntfs_error(vol->sb, "Quota inodes are not open.");
return false;
}
- mutex_lock(&vol->quota_q_ino->i_mutex);
+ inode_lock(vol->quota_q_ino);
ictx = ntfs_index_ctx_get(NTFS_I(vol->quota_q_ino));
if (!ictx) {
ntfs_error(vol->sb, "Failed to get index context.");
@@ -98,7 +98,7 @@ bool ntfs_mark_quotas_out_of_date(ntfs_volume *vol)
ntfs_index_entry_mark_dirty(ictx);
set_done:
ntfs_index_ctx_put(ictx);
- mutex_unlock(&vol->quota_q_ino->i_mutex);
+ inode_unlock(vol->quota_q_ino);
/*
* We set the flag so we do not try to mark the quotas out of date
* again on remount.
@@ -110,7 +110,7 @@ done:
err_out:
if (ictx)
ntfs_index_ctx_put(ictx);
- mutex_unlock(&vol->quota_q_ino->i_mutex);
+ inode_unlock(vol->quota_q_ino);
return false;
}
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index d1a853585..1b38abdaa 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1284,10 +1284,10 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
* Find the inode number for the hibernation file by looking up the
* filename hiberfil.sys in the root directory.
*/
- mutex_lock(&vol->root_ino->i_mutex);
+ inode_lock(vol->root_ino);
mref = ntfs_lookup_inode_by_name(NTFS_I(vol->root_ino), hiberfil, 12,
&name);
- mutex_unlock(&vol->root_ino->i_mutex);
+ inode_unlock(vol->root_ino);
if (IS_ERR_MREF(mref)) {
ret = MREF_ERR(mref);
/* If the file does not exist, Windows is not hibernated. */
@@ -1377,10 +1377,10 @@ static bool load_and_init_quota(ntfs_volume *vol)
* Find the inode number for the quota file by looking up the filename
* $Quota in the extended system files directory $Extend.
*/
- mutex_lock(&vol->extend_ino->i_mutex);
+ inode_lock(vol->extend_ino);
mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), Quota, 6,
&name);
- mutex_unlock(&vol->extend_ino->i_mutex);
+ inode_unlock(vol->extend_ino);
if (IS_ERR_MREF(mref)) {
/*
* If the file does not exist, quotas are disabled and have
@@ -1460,10 +1460,10 @@ static bool load_and_init_usnjrnl(ntfs_volume *vol)
* Find the inode number for the transaction log file by looking up the
* filename $UsnJrnl in the extended system files directory $Extend.
*/
- mutex_lock(&vol->extend_ino->i_mutex);
+ inode_lock(vol->extend_ino);
mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), UsnJrnl, 8,
&name);
- mutex_unlock(&vol->extend_ino->i_mutex);
+ inode_unlock(vol->extend_ino);
if (IS_ERR_MREF(mref)) {
/*
* If the file does not exist, transaction logging is disabled,
@@ -3139,8 +3139,8 @@ static int __init init_ntfs_fs(void)
ntfs_big_inode_cache = kmem_cache_create(ntfs_big_inode_cache_name,
sizeof(big_ntfs_inode), 0,
- SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
- ntfs_big_inode_init_once);
+ SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
+ SLAB_ACCOUNT, ntfs_big_inode_init_once);
if (!ntfs_big_inode_cache) {
pr_crit("Failed to create %s!\n", ntfs_big_inode_cache_name);
goto big_inode_err_out;
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 86181d652..d002579c6 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -164,7 +164,7 @@ static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
struct ocfs2_extent_rec *rec);
static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et);
static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et);
-static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = {
+static const struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = {
.eo_set_last_eb_blk = ocfs2_dinode_set_last_eb_blk,
.eo_get_last_eb_blk = ocfs2_dinode_get_last_eb_blk,
.eo_update_clusters = ocfs2_dinode_update_clusters,
@@ -286,7 +286,7 @@ static void ocfs2_xattr_value_update_clusters(struct ocfs2_extent_tree *et,
le32_add_cpu(&vb->vb_xv->xr_clusters, clusters);
}
-static struct ocfs2_extent_tree_operations ocfs2_xattr_value_et_ops = {
+static const struct ocfs2_extent_tree_operations ocfs2_xattr_value_et_ops = {
.eo_set_last_eb_blk = ocfs2_xattr_value_set_last_eb_blk,
.eo_get_last_eb_blk = ocfs2_xattr_value_get_last_eb_blk,
.eo_update_clusters = ocfs2_xattr_value_update_clusters,
@@ -332,7 +332,7 @@ static void ocfs2_xattr_tree_update_clusters(struct ocfs2_extent_tree *et,
le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, clusters);
}
-static struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = {
+static const struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = {
.eo_set_last_eb_blk = ocfs2_xattr_tree_set_last_eb_blk,
.eo_get_last_eb_blk = ocfs2_xattr_tree_get_last_eb_blk,
.eo_update_clusters = ocfs2_xattr_tree_update_clusters,
@@ -379,7 +379,7 @@ static void ocfs2_dx_root_fill_root_el(struct ocfs2_extent_tree *et)
et->et_root_el = &dx_root->dr_list;
}
-static struct ocfs2_extent_tree_operations ocfs2_dx_root_et_ops = {
+static const struct ocfs2_extent_tree_operations ocfs2_dx_root_et_ops = {
.eo_set_last_eb_blk = ocfs2_dx_root_set_last_eb_blk,
.eo_get_last_eb_blk = ocfs2_dx_root_get_last_eb_blk,
.eo_update_clusters = ocfs2_dx_root_update_clusters,
@@ -425,7 +425,7 @@ ocfs2_refcount_tree_extent_contig(struct ocfs2_extent_tree *et,
return CONTIG_NONE;
}
-static struct ocfs2_extent_tree_operations ocfs2_refcount_tree_et_ops = {
+static const struct ocfs2_extent_tree_operations ocfs2_refcount_tree_et_ops = {
.eo_set_last_eb_blk = ocfs2_refcount_tree_set_last_eb_blk,
.eo_get_last_eb_blk = ocfs2_refcount_tree_get_last_eb_blk,
.eo_update_clusters = ocfs2_refcount_tree_update_clusters,
@@ -438,7 +438,7 @@ static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
struct buffer_head *bh,
ocfs2_journal_access_func access,
void *obj,
- struct ocfs2_extent_tree_operations *ops)
+ const struct ocfs2_extent_tree_operations *ops)
{
et->et_ops = ops;
et->et_root_bh = bh;
@@ -5719,7 +5719,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
goto bail;
}
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
if (ocfs2_truncate_log_needs_flush(osb)) {
ret = __ocfs2_flush_truncate_log(osb);
@@ -5776,7 +5776,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
out_commit:
ocfs2_commit_trans(osb, handle);
out:
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
bail:
if (meta_ac)
ocfs2_free_alloc_context(meta_ac);
@@ -5832,7 +5832,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
struct ocfs2_dinode *di;
struct ocfs2_truncate_log *tl;
- BUG_ON(mutex_trylock(&tl_inode->i_mutex));
+ BUG_ON(inode_trylock(tl_inode));
start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
@@ -5980,7 +5980,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
struct ocfs2_dinode *di;
struct ocfs2_truncate_log *tl;
- BUG_ON(mutex_trylock(&tl_inode->i_mutex));
+ BUG_ON(inode_trylock(tl_inode));
di = (struct ocfs2_dinode *) tl_bh->b_data;
@@ -6008,7 +6008,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
goto out;
}
- mutex_lock(&data_alloc_inode->i_mutex);
+ inode_lock(data_alloc_inode);
status = ocfs2_inode_lock(data_alloc_inode, &data_alloc_bh, 1);
if (status < 0) {
@@ -6035,7 +6035,7 @@ out_unlock:
ocfs2_inode_unlock(data_alloc_inode, 1);
out_mutex:
- mutex_unlock(&data_alloc_inode->i_mutex);
+ inode_unlock(data_alloc_inode);
iput(data_alloc_inode);
out:
@@ -6047,9 +6047,9 @@ int ocfs2_flush_truncate_log(struct ocfs2_super *osb)
int status;
struct inode *tl_inode = osb->osb_tl_inode;
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
status = __ocfs2_flush_truncate_log(osb);
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
return status;
}
@@ -6174,8 +6174,7 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
}
bail:
- if (tl_inode)
- iput(tl_inode);
+ iput(tl_inode);
brelse(tl_bh);
if (status < 0) {
@@ -6209,7 +6208,7 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
(unsigned long long)le64_to_cpu(tl_copy->i_blkno),
num_recs);
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
for(i = 0; i < num_recs; i++) {
if (ocfs2_truncate_log_needs_flush(osb)) {
status = __ocfs2_flush_truncate_log(osb);
@@ -6240,7 +6239,7 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
}
bail_up:
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
return status;
}
@@ -6347,7 +6346,7 @@ static int ocfs2_free_cached_blocks(struct ocfs2_super *osb,
goto out;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = ocfs2_inode_lock(inode, &di_bh, 1);
if (ret) {
@@ -6396,7 +6395,7 @@ out_unlock:
ocfs2_inode_unlock(inode, 1);
brelse(di_bh);
out_mutex:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
iput(inode);
out:
while(head) {
@@ -6440,7 +6439,7 @@ static int ocfs2_free_cached_clusters(struct ocfs2_super *osb,
handle_t *handle;
int ret = 0;
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
while (head) {
if (ocfs2_truncate_log_needs_flush(osb)) {
@@ -6472,7 +6471,7 @@ static int ocfs2_free_cached_clusters(struct ocfs2_super *osb,
}
}
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
while (head) {
/* Premature exit may have left some dangling items. */
@@ -7356,7 +7355,7 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
goto out;
}
- mutex_lock(&main_bm_inode->i_mutex);
+ inode_lock(main_bm_inode);
ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 0);
if (ret < 0) {
@@ -7423,7 +7422,7 @@ out_unlock:
ocfs2_inode_unlock(main_bm_inode, 0);
brelse(main_bm_bh);
out_mutex:
- mutex_unlock(&main_bm_inode->i_mutex);
+ inode_unlock(main_bm_inode);
iput(main_bm_inode);
out:
return ret;
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index fb09b97db..f3dc1b0df 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -54,7 +54,7 @@
*/
struct ocfs2_extent_tree_operations;
struct ocfs2_extent_tree {
- struct ocfs2_extent_tree_operations *et_ops;
+ const struct ocfs2_extent_tree_operations *et_ops;
struct buffer_head *et_root_bh;
struct ocfs2_extent_list *et_root_el;
struct ocfs2_caching_info *et_ci;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index e6795c7c7..cda0361e9 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2047,9 +2047,9 @@ static int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
int ret = 0;
unsigned int truncated_clusters;
- mutex_lock(&osb->osb_tl_inode->i_mutex);
+ inode_lock(osb->osb_tl_inode);
truncated_clusters = osb->truncated_clusters;
- mutex_unlock(&osb->osb_tl_inode->i_mutex);
+ inode_unlock(osb->osb_tl_inode);
/*
* Check whether we can succeed in allocating if we free
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 709fbbd44..a76b9ea77 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1254,15 +1254,15 @@ static const struct file_operations o2hb_debug_fops = {
void o2hb_exit(void)
{
- kfree(o2hb_db_livenodes);
- kfree(o2hb_db_liveregions);
- kfree(o2hb_db_quorumregions);
- kfree(o2hb_db_failedregions);
debugfs_remove(o2hb_debug_failedregions);
debugfs_remove(o2hb_debug_quorumregions);
debugfs_remove(o2hb_debug_liveregions);
debugfs_remove(o2hb_debug_livenodes);
debugfs_remove(o2hb_debug_dir);
+ kfree(o2hb_db_livenodes);
+ kfree(o2hb_db_liveregions);
+ kfree(o2hb_db_quorumregions);
+ kfree(o2hb_db_failedregions);
}
static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir,
@@ -1438,13 +1438,15 @@ static void o2hb_region_release(struct config_item *item)
kfree(reg->hr_slots);
- kfree(reg->hr_db_regnum);
- kfree(reg->hr_db_livenodes);
debugfs_remove(reg->hr_debug_livenodes);
debugfs_remove(reg->hr_debug_regnum);
debugfs_remove(reg->hr_debug_elapsed_time);
debugfs_remove(reg->hr_debug_pinned);
debugfs_remove(reg->hr_debug_dir);
+ kfree(reg->hr_db_livenodes);
+ kfree(reg->hr_db_regnum);
+ kfree(reg->hr_debug_elapsed_time);
+ kfree(reg->hr_debug_pinned);
spin_lock(&o2hb_live_lock);
list_del(&reg->hr_all_item);
@@ -1780,8 +1782,8 @@ static ssize_t o2hb_region_dev_store(struct config_item *item,
}
++live_threshold;
atomic_set(&reg->hr_steady_iterations, live_threshold);
- /* unsteady_iterations is double the steady_iterations */
- atomic_set(&reg->hr_unsteady_iterations, (live_threshold << 1));
+ /* unsteady_iterations is triple the steady_iterations */
+ atomic_set(&reg->hr_unsteady_iterations, (live_threshold * 3));
hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s",
reg->hr_item.ci_name);
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 72afdca3c..ebe543894 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -757,7 +757,7 @@ int o2nm_depend_item(struct config_item *item)
void o2nm_undepend_item(struct config_item *item)
{
- configfs_undepend_item(&o2nm_cluster_group.cs_subsys, item);
+ configfs_undepend_item(item);
}
int o2nm_depend_this_node(void)
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index ffecf89c8..e1adf285f 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -4361,7 +4361,7 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
mlog_errno(ret);
goto out;
}
- mutex_lock(&dx_alloc_inode->i_mutex);
+ inode_lock(dx_alloc_inode);
ret = ocfs2_inode_lock(dx_alloc_inode, &dx_alloc_bh, 1);
if (ret) {
@@ -4410,7 +4410,7 @@ out_unlock:
ocfs2_inode_unlock(dx_alloc_inode, 1);
out_mutex:
- mutex_unlock(&dx_alloc_inode->i_mutex);
+ inode_unlock(dx_alloc_inode);
brelse(dx_alloc_bh);
out:
iput(dx_alloc_inode);
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index e88ccf8c8..68c607e63 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -376,17 +376,6 @@ struct dlm_lock
lksb_kernel_allocated:1;
};
-
-#define DLM_LKSB_UNUSED1 0x01
-#define DLM_LKSB_PUT_LVB 0x02
-#define DLM_LKSB_GET_LVB 0x04
-#define DLM_LKSB_UNUSED2 0x08
-#define DLM_LKSB_UNUSED3 0x10
-#define DLM_LKSB_UNUSED4 0x20
-#define DLM_LKSB_UNUSED5 0x40
-#define DLM_LKSB_UNUSED6 0x80
-
-
enum dlm_lockres_list {
DLM_GRANTED_LIST = 0,
DLM_CONVERTING_LIST = 1,
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 4e2162b35..9477d6e1d 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2388,8 +2388,8 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
spin_lock(&res->spinlock);
BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF);
+ __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG);
if (test_bit(node, res->refmap)) {
- __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG);
dlm_lockres_clear_refmap_bit(dlm, res, node);
cleared = 1;
}
@@ -2549,7 +2549,7 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
}
fail:
- if (oldmle) {
+ if (ret != -EEXIST && oldmle) {
/* master is known, detach if not already detached */
dlm_mle_detach_hb_events(dlm, oldmle);
dlm_put_mle(oldmle);
@@ -3045,7 +3045,7 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data,
int ret = 0;
if (!dlm_grab(dlm))
- return -EINVAL;
+ return 0;
name = migrate->name;
namelen = migrate->namelen;
@@ -3136,7 +3136,8 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
mlog(0, "tried to migrate %.*s, but some "
"process beat me to it\n",
namelen, name);
- ret = -EEXIST;
+ spin_unlock(&tmp->spinlock);
+ return -EEXIST;
} else {
/* bad. 2 NODES are trying to migrate! */
mlog(ML_ERROR, "migration error mle: "
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 42f0cae93..b94a425f0 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1373,6 +1373,7 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
char *buf = NULL;
struct dlm_work_item *item = NULL;
struct dlm_lock_resource *res = NULL;
+ unsigned int hash;
if (!dlm_grab(dlm))
return -EINVAL;
@@ -1400,7 +1401,10 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
/* lookup the lock to see if we have a secondary queue for this
* already... just add the locks in and this will have its owner
* and RECOVERY flag changed when it completes. */
- res = dlm_lookup_lockres(dlm, mres->lockname, mres->lockname_len);
+ hash = dlm_lockid_hash(mres->lockname, mres->lockname_len);
+ spin_lock(&dlm->spinlock);
+ res = __dlm_lookup_lockres(dlm, mres->lockname, mres->lockname_len,
+ hash);
if (res) {
/* this will get a ref on res */
/* mark it as recovering/migrating and hash it */
@@ -1421,13 +1425,16 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
mres->lockname_len, mres->lockname);
ret = -EFAULT;
spin_unlock(&res->spinlock);
+ spin_unlock(&dlm->spinlock);
dlm_lockres_put(res);
goto leave;
}
res->state |= DLM_LOCK_RES_MIGRATING;
}
spin_unlock(&res->spinlock);
+ spin_unlock(&dlm->spinlock);
} else {
+ spin_unlock(&dlm->spinlock);
/* need to allocate, just like if it was
* mastered here normally */
res = dlm_new_lockres(dlm, mres->lockname, mres->lockname_len);
@@ -2452,11 +2459,7 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx)
* perhaps later we can genericize this for other waiters. */
wake_up(&dlm->migration_wq);
- if (test_bit(idx, dlm->recovery_map))
- mlog(0, "domain %s, node %u already added "
- "to recovery map!\n", dlm->name, idx);
- else
- set_bit(idx, dlm->recovery_map);
+ set_bit(idx, dlm->recovery_map);
}
void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data)
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 2e3c9dbab..1082b2c30 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -421,7 +421,7 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data,
}
if (!dlm_grab(dlm))
- return DLM_REJECTED;
+ return DLM_FORWARD;
mlog_bug_on_msg(!dlm_domain_fully_joined(dlm),
"Domain %s not fully joined!\n", dlm->name);
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index b5cf27dcb..03768bb3a 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -638,7 +638,7 @@ static int __init init_dlmfs_fs(void)
dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache",
sizeof(struct dlmfs_inode_private),
0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
dlmfs_init_once);
if (!dlmfs_inode_cache) {
status = -ENOMEM;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index b002acf50..474e57f83 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2438,12 +2438,6 @@ bail:
* done this we have to return AOP_TRUNCATED_PAGE so the aop method
* that called us can bubble that back up into the VFS who will then
* immediately retry the aop call.
- *
- * We do a blocking lock and immediate unlock before returning, though, so that
- * the lock has a great chance of being cached on this node by the time the VFS
- * calls back to retry the aop. This has a potential to livelock as nodes
- * ping locks back and forth, but that's a risk we're willing to take to avoid
- * the lock inversion simply.
*/
int ocfs2_inode_lock_with_page(struct inode *inode,
struct buffer_head **ret_bh,
@@ -2455,8 +2449,6 @@ int ocfs2_inode_lock_with_page(struct inode *inode,
ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK);
if (ret == -EAGAIN) {
unlock_page(page);
- if (ocfs2_inode_lock(inode, ret_bh, ex) == 0)
- ocfs2_inode_unlock(inode, ex);
ret = AOP_TRUNCATED_PAGE;
}
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 0e5b4515f..7cb38fdca 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1302,6 +1302,14 @@ int ocfs2_getattr(struct vfsmount *mnt,
}
generic_fillattr(inode, stat);
+ /*
+ * If there is inline data in the inode, the inode will normally not
+ * have data blocks allocated (it may have an external xattr block).
+ * Report at least one sector for such files, so tools like tar, rsync,
+ * others don't incorrectly think the file is completely sparse.
+ */
+ if (unlikely(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL))
+ stat->blocks += (stat->size + 511)>>9;
/* We set the blksize from the cluster size for performance */
stat->blksize = osb->s_clustersize;
@@ -1864,7 +1872,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
return -EROFS;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* This prevents concurrent writes on other nodes
@@ -1983,7 +1991,7 @@ out_rw_unlock:
ocfs2_rw_unlock(inode, 1);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
@@ -2291,7 +2299,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
appending = iocb->ki_flags & IOCB_APPEND ? 1 : 0;
direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
relock:
/*
@@ -2427,7 +2435,7 @@ out:
ocfs2_rw_unlock(inode, rw_level);
out_mutex:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (written)
ret = written;
@@ -2539,7 +2547,7 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
struct inode *inode = file->f_mapping->host;
int ret = 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
switch (whence) {
case SEEK_SET:
@@ -2577,7 +2585,7 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (ret)
return ret;
return offset;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 8f87e05ee..36294446d 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -361,6 +361,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
break;
case S_IFLNK:
inode->i_op = &ocfs2_symlink_inode_operations;
+ inode_nohighmem(inode);
i_size_write(inode, le64_to_cpu(fe->i_size));
break;
default:
@@ -629,10 +630,10 @@ static int ocfs2_remove_inode(struct inode *inode,
goto bail;
}
- mutex_lock(&inode_alloc_inode->i_mutex);
+ inode_lock(inode_alloc_inode);
status = ocfs2_inode_lock(inode_alloc_inode, &inode_alloc_bh, 1);
if (status < 0) {
- mutex_unlock(&inode_alloc_inode->i_mutex);
+ inode_unlock(inode_alloc_inode);
mlog_errno(status);
goto bail;
@@ -679,7 +680,7 @@ bail_commit:
ocfs2_commit_trans(osb, handle);
bail_unlock:
ocfs2_inode_unlock(inode_alloc_inode, 1);
- mutex_unlock(&inode_alloc_inode->i_mutex);
+ inode_unlock(inode_alloc_inode);
brelse(inode_alloc_bh);
bail:
iput(inode_alloc_inode);
@@ -750,10 +751,10 @@ static int ocfs2_wipe_inode(struct inode *inode,
/* Lock the orphan dir. The lock will be held for the entire
* delete_inode operation. We do this now to avoid races with
* recovery completion on other nodes. */
- mutex_lock(&orphan_dir_inode->i_mutex);
+ inode_lock(orphan_dir_inode);
status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
if (status < 0) {
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
mlog_errno(status);
goto bail;
@@ -802,7 +803,7 @@ bail_unlock_dir:
return status;
ocfs2_inode_unlock(orphan_dir_inode, 1);
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
brelse(orphan_dir_bh);
bail:
iput(orphan_dir_inode);
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 3cb097ccc..4506ec5ec 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -86,7 +86,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
unsigned oldflags;
int status;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
status = ocfs2_inode_lock(inode, &bh, 1);
if (status < 0) {
@@ -135,7 +135,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
bail_unlock:
ocfs2_inode_unlock(inode, 1);
bail:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
brelse(bh);
@@ -287,7 +287,7 @@ static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb,
struct ocfs2_dinode *dinode_alloc = NULL;
if (inode_alloc)
- mutex_lock(&inode_alloc->i_mutex);
+ inode_lock(inode_alloc);
if (o2info_coherent(&fi->ifi_req)) {
status = ocfs2_inode_lock(inode_alloc, &bh, 0);
@@ -317,7 +317,7 @@ bail:
ocfs2_inode_unlock(inode_alloc, 0);
if (inode_alloc)
- mutex_unlock(&inode_alloc->i_mutex);
+ inode_unlock(inode_alloc);
brelse(bh);
@@ -547,7 +547,7 @@ static int ocfs2_info_freefrag_scan_bitmap(struct ocfs2_super *osb,
struct ocfs2_dinode *gb_dinode = NULL;
if (gb_inode)
- mutex_lock(&gb_inode->i_mutex);
+ inode_lock(gb_inode);
if (o2info_coherent(&ffg->iff_req)) {
status = ocfs2_inode_lock(gb_inode, &bh, 0);
@@ -604,11 +604,9 @@ bail:
ocfs2_inode_unlock(gb_inode, 0);
if (gb_inode)
- mutex_unlock(&gb_inode->i_mutex);
-
- if (gb_inode)
- iput(gb_inode);
+ inode_unlock(gb_inode);
+ iput(gb_inode);
brelse(bh);
return status;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 13534f4fe..61b833b72 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1042,8 +1042,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
// up_write(&journal->j_trans_barrier);
done:
- if (inode)
- iput(inode);
+ iput(inode);
}
static void ocfs2_clear_journal_error(struct super_block *sb,
@@ -1687,9 +1686,7 @@ done:
if (got_lock)
ocfs2_inode_unlock(inode, 1);
- if (inode)
- iput(inode);
-
+ iput(inode);
brelse(bh);
return status;
@@ -1796,8 +1793,7 @@ static int ocfs2_trylock_journal(struct ocfs2_super *osb,
ocfs2_inode_unlock(inode, 1);
bail:
- if (inode)
- iput(inode);
+ iput(inode);
return status;
}
@@ -2092,7 +2088,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
return status;
}
- mutex_lock(&orphan_dir_inode->i_mutex);
+ inode_lock(orphan_dir_inode);
status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
if (status < 0) {
mlog_errno(status);
@@ -2110,7 +2106,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
out_cluster:
ocfs2_inode_unlock(orphan_dir_inode, 0);
out:
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
iput(orphan_dir_inode);
return status;
}
@@ -2200,7 +2196,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
oi->ip_next_orphan = NULL;
if (oi->ip_flags & OCFS2_INODE_DIO_ORPHAN_ENTRY) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = ocfs2_rw_lock(inode, 1);
if (ret < 0) {
mlog_errno(ret);
@@ -2239,7 +2235,7 @@ unlock_inode:
unlock_rw:
ocfs2_rw_unlock(inode, 1);
unlock_mutex:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/* clear dio flag in ocfs2_inode_info */
oi->ip_flags &= ~OCFS2_INODE_DIO_ORPHAN_ENTRY;
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 0a4457fb0..7d62c43a2 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -358,8 +358,7 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
bail:
if (status < 0)
brelse(alloc_bh);
- if (inode)
- iput(inode);
+ iput(inode);
trace_ocfs2_load_local_alloc(osb->local_alloc_bits);
@@ -415,7 +414,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
goto out;
}
- mutex_lock(&main_bm_inode->i_mutex);
+ inode_lock(main_bm_inode);
status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
if (status < 0) {
@@ -469,12 +468,11 @@ out_unlock:
ocfs2_inode_unlock(main_bm_inode, 1);
out_mutex:
- mutex_unlock(&main_bm_inode->i_mutex);
+ inode_unlock(main_bm_inode);
iput(main_bm_inode);
out:
- if (local_alloc_inode)
- iput(local_alloc_inode);
+ iput(local_alloc_inode);
kfree(alloc_copy);
}
@@ -508,7 +506,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
goto bail;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
status = ocfs2_read_inode_block_full(inode, &alloc_bh,
OCFS2_BH_IGNORE_CACHE);
@@ -541,7 +539,7 @@ bail:
brelse(alloc_bh);
if (inode) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
iput(inode);
}
@@ -573,7 +571,7 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
goto out;
}
- mutex_lock(&main_bm_inode->i_mutex);
+ inode_lock(main_bm_inode);
status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
if (status < 0) {
@@ -603,7 +601,7 @@ out_unlock:
ocfs2_inode_unlock(main_bm_inode, 1);
out_mutex:
- mutex_unlock(&main_bm_inode->i_mutex);
+ inode_unlock(main_bm_inode);
brelse(main_bm_bh);
@@ -645,7 +643,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
goto bail;
}
- mutex_lock(&local_alloc_inode->i_mutex);
+ inode_lock(local_alloc_inode);
/*
* We must double check state and allocator bits because
@@ -711,7 +709,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
status = 0;
bail:
if (status < 0 && local_alloc_inode) {
- mutex_unlock(&local_alloc_inode->i_mutex);
+ inode_unlock(local_alloc_inode);
iput(local_alloc_inode);
}
@@ -1327,9 +1325,7 @@ bail:
brelse(main_bm_bh);
- if (main_bm_inode)
- iput(main_bm_inode);
-
+ iput(main_bm_inode);
kfree(alloc_copy);
if (ac)
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 9581d190f..77ebc2bc1 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -147,6 +147,10 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
ret = ocfs2_inode_lock(inode, &di_bh, 1);
if (ret < 0) {
mlog_errno(ret);
+ if (ret == -ENOMEM)
+ ret = VM_FAULT_OOM;
+ else
+ ret = VM_FAULT_SIGBUS;
goto out;
}
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 124471d26..e3d05d990 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -276,7 +276,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
* context->data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
*/
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
if (ocfs2_truncate_log_needs_flush(osb)) {
ret = __ocfs2_flush_truncate_log(osb);
@@ -338,7 +338,7 @@ out_commit:
ocfs2_commit_trans(osb, handle);
out_unlock_mutex:
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
if (context->data_ac) {
ocfs2_free_alloc_context(context->data_ac);
@@ -632,7 +632,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
goto out;
}
- mutex_lock(&gb_inode->i_mutex);
+ inode_lock(gb_inode);
ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1);
if (ret) {
@@ -640,7 +640,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
goto out_unlock_gb_mutex;
}
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
handle = ocfs2_start_trans(osb, credits);
if (IS_ERR(handle)) {
@@ -708,11 +708,11 @@ out_commit:
brelse(gd_bh);
out_unlock_tl_inode:
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
ocfs2_inode_unlock(gb_inode, 1);
out_unlock_gb_mutex:
- mutex_unlock(&gb_inode->i_mutex);
+ inode_unlock(gb_inode);
brelse(gb_bh);
iput(gb_inode);
@@ -905,7 +905,7 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
return -EROFS;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* This prevents concurrent writes from other nodes
@@ -969,7 +969,7 @@ out_inode_unlock:
out_rw_unlock:
ocfs2_rw_unlock(inode, 1);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return status;
}
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 3123408da..6b3e87189 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1045,7 +1045,7 @@ leave:
if (orphan_dir) {
/* This was locked for us in ocfs2_prepare_orphan_dir() */
ocfs2_inode_unlock(orphan_dir, 1);
- mutex_unlock(&orphan_dir->i_mutex);
+ inode_unlock(orphan_dir);
iput(orphan_dir);
}
@@ -1664,7 +1664,7 @@ bail:
if (orphan_dir) {
/* This was locked for us in ocfs2_prepare_orphan_dir() */
ocfs2_inode_unlock(orphan_dir, 1);
- mutex_unlock(&orphan_dir->i_mutex);
+ inode_unlock(orphan_dir);
iput(orphan_dir);
}
@@ -1683,8 +1683,7 @@ bail:
if (new_inode)
sync_mapping_buffers(old_inode->i_mapping);
- if (new_inode)
- iput(new_inode);
+ iput(new_inode);
ocfs2_free_dir_lookup_result(&target_lookup_res);
ocfs2_free_dir_lookup_result(&old_entry_lookup);
@@ -1958,6 +1957,7 @@ static int ocfs2_symlink(struct inode *dir,
inode->i_rdev = 0;
newsize = l - 1;
inode->i_op = &ocfs2_symlink_inode_operations;
+ inode_nohighmem(inode);
if (l > ocfs2_fast_symlink_chars(sb)) {
u32 offset = 0;
@@ -2121,11 +2121,11 @@ static int ocfs2_lookup_lock_orphan_dir(struct ocfs2_super *osb,
return ret;
}
- mutex_lock(&orphan_dir_inode->i_mutex);
+ inode_lock(orphan_dir_inode);
ret = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
if (ret < 0) {
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
iput(orphan_dir_inode);
mlog_errno(ret);
@@ -2226,7 +2226,7 @@ out:
if (ret) {
ocfs2_inode_unlock(orphan_dir_inode, 1);
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
iput(orphan_dir_inode);
}
@@ -2372,6 +2372,15 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
(unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno,
name, strlen(name));
+ status = ocfs2_journal_access_di(handle,
+ INODE_CACHE(orphan_dir_inode),
+ orphan_dir_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto leave;
+ }
+
/* find it's spot in the orphan directory */
status = ocfs2_find_entry(name, strlen(name), orphan_dir_inode,
&lookup);
@@ -2387,15 +2396,6 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
goto leave;
}
- status = ocfs2_journal_access_di(handle,
- INODE_CACHE(orphan_dir_inode),
- orphan_dir_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- mlog_errno(status);
- goto leave;
- }
-
/* do the i_nlink dance! :) */
orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
if (S_ISDIR(inode->i_mode))
@@ -2495,7 +2495,7 @@ out:
ocfs2_free_alloc_context(inode_ac);
/* Unroll orphan dir locking */
- mutex_unlock(&orphan_dir->i_mutex);
+ inode_unlock(orphan_dir);
ocfs2_inode_unlock(orphan_dir, 1);
iput(orphan_dir);
}
@@ -2602,7 +2602,7 @@ leave:
if (orphan_dir) {
/* This was locked for us in ocfs2_prepare_orphan_dir() */
ocfs2_inode_unlock(orphan_dir, 1);
- mutex_unlock(&orphan_dir->i_mutex);
+ inode_unlock(orphan_dir);
iput(orphan_dir);
}
@@ -2689,7 +2689,7 @@ int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
bail_unlock_orphan:
ocfs2_inode_unlock(orphan_dir_inode, 1);
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
iput(orphan_dir_inode);
ocfs2_free_dir_lookup_result(&orphan_insert);
@@ -2721,10 +2721,10 @@ int ocfs2_del_inode_from_orphan(struct ocfs2_super *osb,
goto bail;
}
- mutex_lock(&orphan_dir_inode->i_mutex);
+ inode_lock(orphan_dir_inode);
status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
if (status < 0) {
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
iput(orphan_dir_inode);
mlog_errno(status);
goto bail;
@@ -2770,7 +2770,7 @@ bail_commit:
bail_unlock_orphan:
ocfs2_inode_unlock(orphan_dir_inode, 1);
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
brelse(orphan_dir_bh);
iput(orphan_dir_inode);
@@ -2834,12 +2834,12 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
goto leave;
}
- mutex_lock(&orphan_dir_inode->i_mutex);
+ inode_lock(orphan_dir_inode);
status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
if (status < 0) {
mlog_errno(status);
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
iput(orphan_dir_inode);
goto leave;
}
@@ -2901,7 +2901,7 @@ out_commit:
ocfs2_commit_trans(osb, handle);
orphan_unlock:
ocfs2_inode_unlock(orphan_dir_inode, 1);
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
iput(orphan_dir_inode);
leave:
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index b6d51333a..d153e6e31 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -82,7 +82,7 @@ struct ocfs2_quota_chunk {
extern struct kmem_cache *ocfs2_dquot_cachep;
extern struct kmem_cache *ocfs2_qf_chunk_cachep;
-extern struct qtree_fmt_operations ocfs2_global_ops;
+extern const struct qtree_fmt_operations ocfs2_global_ops;
struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery(
struct ocfs2_super *osb, int slot_num);
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index c93d67220..9c9dd30bc 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -123,7 +123,7 @@ static int ocfs2_global_is_id(void *dp, struct dquot *dquot)
dquot->dq_id);
}
-struct qtree_fmt_operations ocfs2_global_ops = {
+const struct qtree_fmt_operations ocfs2_global_ops = {
.mem2disk_dqblk = ocfs2_global_mem2diskdqb,
.disk2mem_dqblk = ocfs2_global_disk2memdqb,
.is_id = ocfs2_global_is_id,
@@ -308,7 +308,7 @@ int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
WARN_ON(bh != oinfo->dqi_gqi_bh);
spin_unlock(&dq_data_lock);
if (ex) {
- mutex_lock(&oinfo->dqi_gqinode->i_mutex);
+ inode_lock(oinfo->dqi_gqinode);
down_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
} else {
down_read(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
@@ -320,7 +320,7 @@ void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
{
if (ex) {
up_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
- mutex_unlock(&oinfo->dqi_gqinode->i_mutex);
+ inode_unlock(oinfo->dqi_gqinode);
} else {
up_read(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
}
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 252119860..3eff031aa 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -807,7 +807,7 @@ int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh)
mlog_errno(ret);
goto out;
}
- mutex_lock(&alloc_inode->i_mutex);
+ inode_lock(alloc_inode);
ret = ocfs2_inode_lock(alloc_inode, &alloc_bh, 1);
if (ret) {
@@ -867,7 +867,7 @@ out_unlock:
}
out_mutex:
if (alloc_inode) {
- mutex_unlock(&alloc_inode->i_mutex);
+ inode_unlock(alloc_inode);
iput(alloc_inode);
}
out:
@@ -4197,7 +4197,7 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
goto out;
}
- mutex_lock_nested(&new_inode->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(new_inode, I_MUTEX_CHILD);
ret = ocfs2_inode_lock_nested(new_inode, &new_bh, 1,
OI_LS_REFLINK_TARGET);
if (ret) {
@@ -4231,7 +4231,7 @@ inode_unlock:
ocfs2_inode_unlock(new_inode, 1);
brelse(new_bh);
out_unlock:
- mutex_unlock(&new_inode->i_mutex);
+ inode_unlock(new_inode);
out:
if (!ret) {
ret = filemap_fdatawait(inode->i_mapping);
@@ -4402,11 +4402,11 @@ static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
return error;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
error = dquot_initialize(dir);
if (!error)
error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (!error)
fsnotify_create(dir, new_dentry);
return error;
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index 79b802130..576b9a048 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -301,7 +301,7 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
goto out;
}
- mutex_lock(&main_bm_inode->i_mutex);
+ inode_lock(main_bm_inode);
ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
if (ret < 0) {
@@ -375,7 +375,7 @@ out_unlock:
ocfs2_inode_unlock(main_bm_inode, 1);
out_mutex:
- mutex_unlock(&main_bm_inode->i_mutex);
+ inode_unlock(main_bm_inode);
iput(main_bm_inode);
out:
@@ -486,7 +486,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
goto out;
}
- mutex_lock(&main_bm_inode->i_mutex);
+ inode_lock(main_bm_inode);
ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
if (ret < 0) {
@@ -590,7 +590,7 @@ out_unlock:
ocfs2_inode_unlock(main_bm_inode, 1);
out_mutex:
- mutex_unlock(&main_bm_inode->i_mutex);
+ inode_unlock(main_bm_inode);
iput(main_bm_inode);
out:
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index e78a203d4..1e0959214 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -322,8 +322,7 @@ static void __ocfs2_free_slot_info(struct ocfs2_slot_info *si)
if (si == NULL)
return;
- if (si->si_inode)
- iput(si->si_inode);
+ iput(si->si_inode);
if (si->si_bh) {
for (i = 0; i < si->si_blocks; i++) {
if (si->si_bh[i]) {
@@ -503,8 +502,17 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
trace_ocfs2_find_slot(osb->slot_num);
status = ocfs2_update_disk_slot(osb, si, osb->slot_num);
- if (status < 0)
+ if (status < 0) {
mlog_errno(status);
+ /*
+ * if write block failed, invalidate slot to avoid overwrite
+ * slot during dismount in case another node rightly has mounted
+ */
+ spin_lock(&osb->osb_lock);
+ ocfs2_invalidate_slot(si, osb->slot_num);
+ osb->slot_num = OCFS2_INVALID_SLOT;
+ spin_unlock(&osb->osb_lock);
+ }
bail:
return status;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index fc6d25f6d..2f19aeec5 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -141,7 +141,7 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
if (ac->ac_which != OCFS2_AC_USE_LOCAL)
ocfs2_inode_unlock(inode, 1);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
iput(inode);
ac->ac_inode = NULL;
@@ -797,11 +797,11 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
return -EINVAL;
}
- mutex_lock(&alloc_inode->i_mutex);
+ inode_lock(alloc_inode);
status = ocfs2_inode_lock(alloc_inode, &bh, 1);
if (status < 0) {
- mutex_unlock(&alloc_inode->i_mutex);
+ inode_unlock(alloc_inode);
iput(alloc_inode);
mlog_errno(status);
@@ -2875,10 +2875,10 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
goto bail;
}
- mutex_lock(&inode_alloc_inode->i_mutex);
+ inode_lock(inode_alloc_inode);
status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0);
if (status < 0) {
- mutex_unlock(&inode_alloc_inode->i_mutex);
+ inode_unlock(inode_alloc_inode);
iput(inode_alloc_inode);
mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n",
(u32)suballoc_slot, status);
@@ -2891,7 +2891,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
ocfs2_inode_unlock(inode_alloc_inode, 0);
- mutex_unlock(&inode_alloc_inode->i_mutex);
+ inode_unlock(inode_alloc_inode);
iput(inode_alloc_inode);
brelse(alloc_bh);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 2de4c8a93..faa136509 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1280,6 +1280,8 @@ static int ocfs2_parse_options(struct super_block *sb,
int status, user_stack = 0;
char *p;
u32 tmp;
+ int token, option;
+ substring_t args[MAX_OPT_ARGS];
trace_ocfs2_parse_options(is_remount, options ? options : "(none)");
@@ -1298,9 +1300,6 @@ static int ocfs2_parse_options(struct super_block *sb,
}
while ((p = strsep(&options, ",")) != NULL) {
- int token, option;
- substring_t args[MAX_OPT_ARGS];
-
if (!*p)
continue;
@@ -1367,7 +1366,6 @@ static int ocfs2_parse_options(struct super_block *sb,
mopt->atime_quantum = option;
break;
case Opt_slot:
- option = 0;
if (match_int(&args[0], &option)) {
status = 0;
goto bail;
@@ -1376,7 +1374,6 @@ static int ocfs2_parse_options(struct super_block *sb,
mopt->slot = (s16)option;
break;
case Opt_commit:
- option = 0;
if (match_int(&args[0], &option)) {
status = 0;
goto bail;
@@ -1388,7 +1385,6 @@ static int ocfs2_parse_options(struct super_block *sb,
mopt->commit_interval = HZ * option;
break;
case Opt_localalloc:
- option = 0;
if (match_int(&args[0], &option)) {
status = 0;
goto bail;
@@ -1726,8 +1722,7 @@ static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
ocfs2_inode_unlock(inode, 0);
status = 0;
bail:
- if (inode)
- iput(inode);
+ iput(inode);
if (status)
mlog_errno(status);
@@ -1771,7 +1766,7 @@ static int ocfs2_initialize_mem_caches(void)
sizeof(struct ocfs2_inode_info),
0,
(SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
ocfs2_inode_init_once);
ocfs2_dquot_cachep = kmem_cache_create("ocfs2_dquot_cache",
sizeof(struct ocfs2_dquot),
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index 66edce7ec..6c2a3e3c5 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -88,8 +88,7 @@ const struct address_space_operations ocfs2_fast_symlink_aops = {
const struct inode_operations ocfs2_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.getattr = ocfs2_getattr,
.setattr = ocfs2_setattr,
.setxattr = generic_setxattr,
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index e9164f098..7d3d979f5 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -544,8 +544,7 @@ static inline const char *ocfs2_xattr_prefix(int name_index)
if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
handler = ocfs2_xattr_handler_map[name_index];
-
- return handler ? handler->prefix : NULL;
+ return handler ? xattr_prefix(handler) : NULL;
}
static u32 ocfs2_xattr_name_hash(struct inode *inode,
@@ -884,14 +883,39 @@ static int ocfs2_xattr_value_truncate(struct inode *inode,
return ret;
}
-static int ocfs2_xattr_list_entry(char *buffer, size_t size,
- size_t *result, const char *prefix,
+static int ocfs2_xattr_list_entry(struct super_block *sb,
+ char *buffer, size_t size,
+ size_t *result, int type,
const char *name, int name_len)
{
char *p = buffer + *result;
- int prefix_len = strlen(prefix);
- int total_len = prefix_len + name_len + 1;
+ const char *prefix;
+ int prefix_len;
+ int total_len;
+ switch(type) {
+ case OCFS2_XATTR_INDEX_USER:
+ if (OCFS2_SB(sb)->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
+ return 0;
+ break;
+
+ case OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS:
+ case OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT:
+ if (!(sb->s_flags & MS_POSIXACL))
+ return 0;
+ break;
+
+ case OCFS2_XATTR_INDEX_TRUSTED:
+ if (!capable(CAP_SYS_ADMIN))
+ return 0;
+ break;
+ }
+
+ prefix = ocfs2_xattr_prefix(type);
+ if (!prefix)
+ return 0;
+ prefix_len = strlen(prefix);
+ total_len = prefix_len + name_len + 1;
*result += total_len;
/* we are just looking for how big our buffer needs to be */
@@ -914,23 +938,20 @@ static int ocfs2_xattr_list_entries(struct inode *inode,
{
size_t result = 0;
int i, type, ret;
- const char *prefix, *name;
+ const char *name;
for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
type = ocfs2_xattr_get_type(entry);
- prefix = ocfs2_xattr_prefix(type);
-
- if (prefix) {
- name = (const char *)header +
- le16_to_cpu(entry->xe_name_offset);
+ name = (const char *)header +
+ le16_to_cpu(entry->xe_name_offset);
- ret = ocfs2_xattr_list_entry(buffer, buffer_size,
- &result, prefix, name,
- entry->xe_name_len);
- if (ret)
- return ret;
- }
+ ret = ocfs2_xattr_list_entry(inode->i_sb,
+ buffer, buffer_size,
+ &result, type, name,
+ entry->xe_name_len);
+ if (ret)
+ return ret;
}
return result;
@@ -2503,7 +2524,7 @@ static int ocfs2_xattr_free_block(struct inode *inode,
mlog_errno(ret);
goto out;
}
- mutex_lock(&xb_alloc_inode->i_mutex);
+ inode_lock(xb_alloc_inode);
ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
if (ret < 0) {
@@ -2528,7 +2549,7 @@ out_unlock:
ocfs2_inode_unlock(xb_alloc_inode, 1);
brelse(xb_alloc_bh);
out_mutex:
- mutex_unlock(&xb_alloc_inode->i_mutex);
+ inode_unlock(xb_alloc_inode);
iput(xb_alloc_inode);
out:
brelse(blk_bh);
@@ -3598,17 +3619,17 @@ int ocfs2_xattr_set(struct inode *inode,
}
}
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
if (ocfs2_truncate_log_needs_flush(osb)) {
ret = __ocfs2_flush_truncate_log(osb);
if (ret < 0) {
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
mlog_errno(ret);
goto cleanup;
}
}
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
&xbs, &ctxt, ref_meta, &credits);
@@ -4033,32 +4054,30 @@ static int ocfs2_list_xattr_bucket(struct inode *inode,
int ret = 0, type;
struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
int i, block_off, new_offset;
- const char *prefix, *name;
+ const char *name;
for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
type = ocfs2_xattr_get_type(entry);
- prefix = ocfs2_xattr_prefix(type);
- if (prefix) {
- ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
- bucket_xh(bucket),
- i,
- &block_off,
- &new_offset);
- if (ret)
- break;
+ ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
+ bucket_xh(bucket),
+ i,
+ &block_off,
+ &new_offset);
+ if (ret)
+ break;
- name = (const char *)bucket_block(bucket, block_off) +
- new_offset;
- ret = ocfs2_xattr_list_entry(xl->buffer,
- xl->buffer_size,
- &xl->result,
- prefix, name,
- entry->xe_name_len);
- if (ret)
- break;
- }
+ name = (const char *)bucket_block(bucket, block_off) +
+ new_offset;
+ ret = ocfs2_xattr_list_entry(inode->i_sb,
+ xl->buffer,
+ xl->buffer_size,
+ &xl->result,
+ type, name,
+ entry->xe_name_len);
+ if (ret)
+ break;
}
return ret;
@@ -5441,7 +5460,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
return ret;
}
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
if (ocfs2_truncate_log_needs_flush(osb)) {
ret = __ocfs2_flush_truncate_log(osb);
@@ -5485,7 +5504,7 @@ out_commit:
out:
ocfs2_schedule_truncate_log_flush(osb, 1);
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
if (meta_ac)
ocfs2_free_alloc_context(meta_ac);
@@ -7226,31 +7245,14 @@ int ocfs2_init_security_and_acl(struct inode *dir,
leave:
return ret;
}
+
/*
* 'security' attributes support
*/
-static size_t ocfs2_xattr_security_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_size, const char *name,
- size_t name_len)
-{
- const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
- memcpy(list + prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
-}
-
static int ocfs2_xattr_security_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_SECURITY,
name, buffer, size);
}
@@ -7259,9 +7261,6 @@ static int ocfs2_xattr_security_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
-
return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_SECURITY,
name, value, size, flags);
}
@@ -7314,7 +7313,6 @@ int ocfs2_init_security_set(handle_t *handle,
const struct xattr_handler ocfs2_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
- .list = ocfs2_xattr_security_list,
.get = ocfs2_xattr_security_get,
.set = ocfs2_xattr_security_set,
};
@@ -7322,31 +7320,10 @@ const struct xattr_handler ocfs2_xattr_security_handler = {
/*
* 'trusted' attributes support
*/
-static size_t ocfs2_xattr_trusted_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_size, const char *name,
- size_t name_len)
-{
- const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
-
- if (!capable(CAP_SYS_ADMIN))
- return 0;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
- memcpy(list + prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
-}
-
static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_TRUSTED,
name, buffer, size);
}
@@ -7355,16 +7332,12 @@ static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
-
return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_TRUSTED,
name, value, size, flags);
}
const struct xattr_handler ocfs2_xattr_trusted_handler = {
.prefix = XATTR_TRUSTED_PREFIX,
- .list = ocfs2_xattr_trusted_list,
.get = ocfs2_xattr_trusted_get,
.set = ocfs2_xattr_trusted_set,
};
@@ -7372,34 +7345,12 @@ const struct xattr_handler ocfs2_xattr_trusted_handler = {
/*
* 'user' attributes support
*/
-static size_t ocfs2_xattr_user_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_size, const char *name,
- size_t name_len)
-{
- const size_t prefix_len = XATTR_USER_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
- struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
-
- if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
- return 0;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_USER_PREFIX, prefix_len);
- memcpy(list + prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
-}
-
static int ocfs2_xattr_user_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
- if (strcmp(name, "") == 0)
- return -EINVAL;
if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
return -EOPNOTSUPP;
return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_USER, name,
@@ -7412,8 +7363,6 @@ static int ocfs2_xattr_user_set(const struct xattr_handler *handler,
{
struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
- if (strcmp(name, "") == 0)
- return -EINVAL;
if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
return -EOPNOTSUPP;
@@ -7423,7 +7372,6 @@ static int ocfs2_xattr_user_set(const struct xattr_handler *handler,
const struct xattr_handler ocfs2_xattr_user_handler = {
.prefix = XATTR_USER_PREFIX,
- .list = ocfs2_xattr_user_list,
.get = ocfs2_xattr_user_get,
.set = ocfs2_xattr_user_set,
};
diff --git a/fs/open.c b/fs/open.c
index 32dfe4f2a..16f561ea7 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -61,13 +61,12 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
if (ret)
newattrs.ia_valid |= ret | ATTR_FORCE;
- mutex_lock(&dentry->d_inode->i_mutex);
+ inode_lock(dentry->d_inode);
/* Note any delegations or leases have already been broken: */
ret = notify_change(dentry, &newattrs, NULL);
- mutex_unlock(&dentry->d_inode->i_mutex);
+ inode_unlock(dentry->d_inode);
return ret;
}
-EXPORT_SYMBOL_GPL(do_truncate);
long vfs_truncate(struct path *path, loff_t length)
{
@@ -514,7 +513,7 @@ static int chmod_common(struct path *path, umode_t mode)
if (error)
return error;
retry_deleg:
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
error = security_path_chmod(path, mode);
if (error)
goto out_unlock;
@@ -522,7 +521,7 @@ retry_deleg:
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
error = notify_change(path->dentry, &newattrs, &delegated_inode);
out_unlock:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (delegated_inode) {
error = break_deleg_wait(&delegated_inode);
if (!error)
@@ -597,11 +596,11 @@ retry_deleg:
if (!S_ISDIR(inode->i_mode))
newattrs.ia_valid |=
ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
error = security_path_chown(path, uid, gid);
if (!error)
error = notify_change(path->dentry, &newattrs, &delegated_inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (delegated_inode) {
error = break_deleg_wait(&delegated_inode);
if (!error)
@@ -682,7 +681,6 @@ int open_check_o_direct(struct file *f)
}
return 0;
}
-EXPORT_SYMBOL_GPL(open_check_o_direct);
static int do_dentry_open(struct file *f,
struct inode *inode,
@@ -892,7 +890,7 @@ EXPORT_SYMBOL(dentry_open);
static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
{
int lookup_flags = 0;
- int acc_mode;
+ int acc_mode = ACC_MODE(flags);
if (flags & (O_CREAT | __O_TMPFILE))
op->mode = (mode & S_IALLUGO) | S_IFREG;
@@ -914,7 +912,6 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
if (flags & __O_TMPFILE) {
if ((flags & O_TMPFILE_MASK) != O_TMPFILE)
return -EINVAL;
- acc_mode = MAY_OPEN | ACC_MODE(flags);
if (!(acc_mode & MAY_WRITE))
return -EINVAL;
} else if (flags & O_PATH) {
@@ -924,8 +921,6 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
*/
flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
acc_mode = 0;
- } else {
- acc_mode = MAY_OPEN | ACC_MODE(flags);
}
op->open_flag = flags;
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 15e4500cd..b61b883c8 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -443,7 +443,7 @@ static int __init init_openprom_fs(void)
sizeof(struct op_inode_info),
0,
(SLAB_RECLAIM_ACCOUNT |
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD | SLAB_ACCOUNT),
op_inode_init_once);
if (!op_inode_cachep)
return -ENOMEM;
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index eff6319d5..d894e7cd9 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -248,9 +248,9 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
if (err)
goto out_cleanup;
- mutex_lock(&newdentry->d_inode->i_mutex);
+ inode_lock(newdentry->d_inode);
err = ovl_set_attr(newdentry, stat);
- mutex_unlock(&newdentry->d_inode->i_mutex);
+ inode_unlock(newdentry->d_inode);
if (err)
goto out_cleanup;
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 692ceda3b..52f6de5d4 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -167,7 +167,7 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
struct dentry *newdentry;
int err;
- mutex_lock_nested(&udir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(udir, I_MUTEX_PARENT);
newdentry = lookup_one_len(dentry->d_name.name, upperdir,
dentry->d_name.len);
err = PTR_ERR(newdentry);
@@ -185,7 +185,7 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
out_dput:
dput(newdentry);
out_unlock:
- mutex_unlock(&udir->i_mutex);
+ inode_unlock(udir);
return err;
}
@@ -258,9 +258,9 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
if (err)
goto out_cleanup;
- mutex_lock(&opaquedir->d_inode->i_mutex);
+ inode_lock(opaquedir->d_inode);
err = ovl_set_attr(opaquedir, &stat);
- mutex_unlock(&opaquedir->d_inode->i_mutex);
+ inode_unlock(opaquedir->d_inode);
if (err)
goto out_cleanup;
@@ -599,7 +599,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
struct dentry *upper = ovl_dentry_upper(dentry);
int err;
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
err = -ESTALE;
if (upper->d_parent == upperdir) {
/* Don't let d_delete() think it can reset d_inode */
@@ -618,8 +618,9 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
* sole user of this dentry. Too tricky... Just unhash for
* now.
*/
- d_drop(dentry);
- mutex_unlock(&dir->i_mutex);
+ if (!err)
+ d_drop(dentry);
+ inode_unlock(dir);
return err;
}
@@ -903,6 +904,13 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old,
if (!overwrite && new_is_dir && !old_opaque && new_opaque)
ovl_remove_opaque(newdentry);
+ /*
+ * Old dentry now lives in different location. Dentries in
+ * lowerstack are stale. We cannot drop them here because
+ * access to them is lockless. This could be only pure upper
+ * or opaque directory - numlower is zero. Or upper non-dir
+ * entry - its pureness is tracked by flag opaque.
+ */
if (old_opaque != new_opaque) {
ovl_dentry_set_opaque(old, new_opaque);
if (!overwrite)
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index b29036aa8..a4ff5d0d7 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -63,9 +63,11 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
if (!err) {
upperdentry = ovl_dentry_upper(dentry);
- mutex_lock(&upperdentry->d_inode->i_mutex);
+ inode_lock(upperdentry->d_inode);
err = notify_change(upperdentry, attr, NULL);
- mutex_unlock(&upperdentry->d_inode->i_mutex);
+ if (!err)
+ ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
+ inode_unlock(upperdentry->d_inode);
}
ovl_drop_write(dentry);
out:
@@ -108,6 +110,29 @@ int ovl_permission(struct inode *inode, int mask)
realdentry = ovl_entry_real(oe, &is_upper);
+ if (ovl_is_default_permissions(inode)) {
+ struct kstat stat;
+ struct path realpath = { .dentry = realdentry };
+
+ if (mask & MAY_NOT_BLOCK)
+ return -ECHILD;
+
+ realpath.mnt = ovl_entry_mnt_real(oe, inode, is_upper);
+
+ err = vfs_getattr(&realpath, &stat);
+ if (err)
+ return err;
+
+ if ((stat.mode ^ inode->i_mode) & S_IFMT)
+ return -ESTALE;
+
+ inode->i_mode = stat.mode;
+ inode->i_uid = stat.uid;
+ inode->i_gid = stat.gid;
+
+ return generic_permission(inode, mask);
+ }
+
/* Careful in RCU walk mode */
realinode = ACCESS_ONCE(realdentry->d_inode);
if (!realinode) {
@@ -144,57 +169,23 @@ out_dput:
return err;
}
-
-struct ovl_link_data {
- struct dentry *realdentry;
- void *cookie;
-};
-
-static const char *ovl_follow_link(struct dentry *dentry, void **cookie)
+static const char *ovl_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
struct dentry *realdentry;
struct inode *realinode;
- struct ovl_link_data *data = NULL;
- const char *ret;
+
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
realdentry = ovl_dentry_real(dentry);
realinode = realdentry->d_inode;
- if (WARN_ON(!realinode->i_op->follow_link))
+ if (WARN_ON(!realinode->i_op->get_link))
return ERR_PTR(-EPERM);
- if (realinode->i_op->put_link) {
- data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL);
- if (!data)
- return ERR_PTR(-ENOMEM);
- data->realdentry = realdentry;
- }
-
- ret = realinode->i_op->follow_link(realdentry, cookie);
- if (IS_ERR_OR_NULL(ret)) {
- kfree(data);
- return ret;
- }
-
- if (data)
- data->cookie = *cookie;
-
- *cookie = data;
-
- return ret;
-}
-
-static void ovl_put_link(struct inode *unused, void *c)
-{
- struct inode *realinode;
- struct ovl_link_data *data = c;
-
- if (!data)
- return;
-
- realinode = data->realdentry->d_inode;
- realinode->i_op->put_link(realinode, data->cookie);
- kfree(data);
+ return realinode->i_op->get_link(realdentry, realinode, done);
}
static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
@@ -391,8 +382,7 @@ static const struct inode_operations ovl_file_inode_operations = {
static const struct inode_operations ovl_symlink_inode_operations = {
.setattr = ovl_setattr,
- .follow_link = ovl_follow_link,
- .put_link = ovl_put_link,
+ .get_link = ovl_get_link,
.readlink = ovl_readlink,
.getattr = ovl_getattr,
.setxattr = ovl_setxattr,
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index e17154aea..99b4168c3 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -142,7 +142,10 @@ struct dentry *ovl_dentry_upper(struct dentry *dentry);
struct dentry *ovl_dentry_lower(struct dentry *dentry);
struct dentry *ovl_dentry_real(struct dentry *dentry);
struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper);
+struct vfsmount *ovl_entry_mnt_real(struct ovl_entry *oe, struct inode *inode,
+ bool is_upper);
struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry);
+bool ovl_is_default_permissions(struct inode *inode);
void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache);
struct dentry *ovl_workdir(struct dentry *dentry);
int ovl_want_write(struct dentry *dentry);
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index adcb1398c..fdaf28f75 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -228,7 +228,7 @@ static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
dput(dentry);
}
}
- mutex_unlock(&dir->d_inode->i_mutex);
+ inode_unlock(dir->d_inode);
}
revert_creds(old_cred);
put_cred(override_cred);
@@ -399,7 +399,7 @@ static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
loff_t res;
struct ovl_dir_file *od = file->private_data;
- mutex_lock(&file_inode(file)->i_mutex);
+ inode_lock(file_inode(file));
if (!file->f_pos)
ovl_dir_reset(file);
@@ -429,7 +429,7 @@ static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
res = offset;
}
out_unlock:
- mutex_unlock(&file_inode(file)->i_mutex);
+ inode_unlock(file_inode(file));
return res;
}
@@ -454,10 +454,10 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
ovl_path_upper(dentry, &upperpath);
realfile = ovl_path_open(&upperpath, O_RDONLY);
smp_mb__before_spinlock();
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (!od->upperfile) {
if (IS_ERR(realfile)) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return PTR_ERR(realfile);
}
od->upperfile = realfile;
@@ -467,7 +467,7 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
fput(realfile);
realfile = od->upperfile;
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
}
@@ -479,9 +479,9 @@ static int ovl_dir_release(struct inode *inode, struct file *file)
struct ovl_dir_file *od = file->private_data;
if (od->cache) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ovl_cache_put(od, file->f_path.dentry);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
fput(od->realfile);
if (od->upperfile)
@@ -557,7 +557,7 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
{
struct ovl_cache_entry *p;
- mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(upper->d_inode, I_MUTEX_CHILD);
list_for_each_entry(p, list, l_node) {
struct dentry *dentry;
@@ -575,5 +575,5 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
ovl_cleanup(upper->d_inode, dentry);
dput(dentry);
}
- mutex_unlock(&upper->d_inode->i_mutex);
+ inode_unlock(upper->d_inode);
}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index f42c9407f..619ad4b01 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -16,6 +16,7 @@
#include <linux/slab.h>
#include <linux/parser.h>
#include <linux/module.h>
+#include <linux/pagemap.h>
#include <linux/sched.h>
#include <linux/statfs.h>
#include <linux/seq_file.h>
@@ -25,12 +26,11 @@ MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Overlay filesystem");
MODULE_LICENSE("GPL");
-#define OVERLAYFS_SUPER_MAGIC 0x794c7630
-
struct ovl_config {
char *lowerdir;
char *upperdir;
char *workdir;
+ bool default_permissions;
};
/* private information held for overlayfs's superblock */
@@ -76,12 +76,14 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry)
if (oe->__upperdentry) {
type = __OVL_PATH_UPPER;
- if (oe->numlower) {
- if (S_ISDIR(dentry->d_inode->i_mode))
- type |= __OVL_PATH_MERGE;
- } else if (!oe->opaque) {
+ /*
+ * Non-dir dentry can hold lower dentry from previous
+ * location. Its purity depends only on opaque flag.
+ */
+ if (oe->numlower && S_ISDIR(dentry->d_inode->i_mode))
+ type |= __OVL_PATH_MERGE;
+ else if (!oe->opaque)
type |= __OVL_PATH_PURE;
- }
} else {
if (oe->numlower > 1)
type |= __OVL_PATH_MERGE;
@@ -155,6 +157,18 @@ struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper)
return realdentry;
}
+struct vfsmount *ovl_entry_mnt_real(struct ovl_entry *oe, struct inode *inode,
+ bool is_upper)
+{
+ if (is_upper) {
+ struct ovl_fs *ofs = inode->i_sb->s_fs_info;
+
+ return ofs->upper_mnt;
+ } else {
+ return oe->numlower ? oe->lowerstack[0].mnt : NULL;
+ }
+}
+
struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
@@ -162,6 +176,13 @@ struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry)
return oe->cache;
}
+bool ovl_is_default_permissions(struct inode *inode)
+{
+ struct ovl_fs *ofs = inode->i_sb->s_fs_info;
+
+ return ofs->config.default_permissions;
+}
+
void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache)
{
struct ovl_entry *oe = dentry->d_fsdata;
@@ -210,7 +231,7 @@ void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
- WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex));
+ WARN_ON(!inode_is_locked(upperdentry->d_parent->d_inode));
WARN_ON(oe->__upperdentry);
BUG_ON(!upperdentry->d_inode);
/*
@@ -225,7 +246,7 @@ void ovl_dentry_version_inc(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
- WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
+ WARN_ON(!inode_is_locked(dentry->d_inode));
oe->version++;
}
@@ -233,7 +254,7 @@ u64 ovl_dentry_version_get(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
- WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
+ WARN_ON(!inode_is_locked(dentry->d_inode));
return oe->version;
}
@@ -322,6 +343,7 @@ static const struct dentry_operations ovl_dentry_operations = {
static const struct dentry_operations ovl_reval_dentry_operations = {
.d_release = ovl_dentry_release,
+ .d_select_inode = ovl_d_select_inode,
.d_revalidate = ovl_dentry_revalidate,
.d_weak_revalidate = ovl_dentry_weak_revalidate,
};
@@ -356,9 +378,9 @@ static inline struct dentry *ovl_lookup_real(struct dentry *dir,
{
struct dentry *dentry;
- mutex_lock(&dir->d_inode->i_mutex);
+ inode_lock(dir->d_inode);
dentry = lookup_one_len(name->name, dir, name->len);
- mutex_unlock(&dir->d_inode->i_mutex);
+ inode_unlock(dir->d_inode);
if (IS_ERR(dentry)) {
if (PTR_ERR(dentry) == -ENOENT)
@@ -595,6 +617,8 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
seq_show_option(m, "upperdir", ufs->config.upperdir);
seq_show_option(m, "workdir", ufs->config.workdir);
}
+ if (ufs->config.default_permissions)
+ seq_puts(m, ",default_permissions");
return 0;
}
@@ -619,6 +643,7 @@ enum {
OPT_LOWERDIR,
OPT_UPPERDIR,
OPT_WORKDIR,
+ OPT_DEFAULT_PERMISSIONS,
OPT_ERR,
};
@@ -626,6 +651,7 @@ static const match_table_t ovl_tokens = {
{OPT_LOWERDIR, "lowerdir=%s"},
{OPT_UPPERDIR, "upperdir=%s"},
{OPT_WORKDIR, "workdir=%s"},
+ {OPT_DEFAULT_PERMISSIONS, "default_permissions"},
{OPT_ERR, NULL}
};
@@ -686,6 +712,10 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
return -ENOMEM;
break;
+ case OPT_DEFAULT_PERMISSIONS:
+ config->default_permissions = true;
+ break;
+
default:
pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
return -EINVAL;
@@ -717,7 +747,7 @@ static struct dentry *ovl_workdir_create(struct vfsmount *mnt,
if (err)
return ERR_PTR(err);
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
retry:
work = lookup_one_len(OVL_WORKDIR_NAME, dentry,
strlen(OVL_WORKDIR_NAME));
@@ -743,7 +773,7 @@ retry:
goto out_dput;
}
out_unlock:
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
mnt_drop_write(mnt);
return work;
diff --git a/fs/pipe.c b/fs/pipe.c
index 42cf8ddf0..ab8dad3cc 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -38,6 +38,12 @@ unsigned int pipe_max_size = 1048576;
*/
unsigned int pipe_min_size = PAGE_SIZE;
+/* Maximum allocatable pages per user. Hard limit is unset by default, soft
+ * matches default values.
+ */
+unsigned long pipe_user_pages_hard;
+unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
+
/*
* We use a start+len construction, which provides full use of the
* allocated memory.
@@ -583,20 +589,49 @@ pipe_fasync(int fd, struct file *filp, int on)
return retval;
}
+static void account_pipe_buffers(struct pipe_inode_info *pipe,
+ unsigned long old, unsigned long new)
+{
+ atomic_long_add(new - old, &pipe->user->pipe_bufs);
+}
+
+static bool too_many_pipe_buffers_soft(struct user_struct *user)
+{
+ return pipe_user_pages_soft &&
+ atomic_long_read(&user->pipe_bufs) >= pipe_user_pages_soft;
+}
+
+static bool too_many_pipe_buffers_hard(struct user_struct *user)
+{
+ return pipe_user_pages_hard &&
+ atomic_long_read(&user->pipe_bufs) >= pipe_user_pages_hard;
+}
+
struct pipe_inode_info *alloc_pipe_info(void)
{
struct pipe_inode_info *pipe;
pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
if (pipe) {
- pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * PIPE_DEF_BUFFERS, GFP_KERNEL);
+ unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
+ struct user_struct *user = get_current_user();
+
+ if (!too_many_pipe_buffers_hard(user)) {
+ if (too_many_pipe_buffers_soft(user))
+ pipe_bufs = 1;
+ pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * pipe_bufs, GFP_KERNEL);
+ }
+
if (pipe->bufs) {
init_waitqueue_head(&pipe->wait);
pipe->r_counter = pipe->w_counter = 1;
- pipe->buffers = PIPE_DEF_BUFFERS;
+ pipe->buffers = pipe_bufs;
+ pipe->user = user;
+ account_pipe_buffers(pipe, 0, pipe_bufs);
mutex_init(&pipe->mutex);
return pipe;
}
+ free_uid(user);
kfree(pipe);
}
@@ -607,6 +642,8 @@ void free_pipe_info(struct pipe_inode_info *pipe)
{
int i;
+ account_pipe_buffers(pipe, pipe->buffers, 0);
+ free_uid(pipe->user);
for (i = 0; i < pipe->buffers; i++) {
struct pipe_buffer *buf = pipe->bufs + i;
if (buf->ops)
@@ -998,6 +1035,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer));
}
+ account_pipe_buffers(pipe, pipe->buffers, nr_pages);
pipe->curbuf = 0;
kfree(pipe->bufs);
pipe->bufs = bufs;
@@ -1069,6 +1107,11 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) {
ret = -EPERM;
goto out;
+ } else if ((too_many_pipe_buffers_hard(pipe->user) ||
+ too_many_pipe_buffers_soft(pipe->user)) &&
+ !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) {
+ ret = -EPERM;
+ goto out;
}
ret = pipe_set_size(pipe, nr_pages);
break;
diff --git a/fs/pnode.c b/fs/pnode.c
index 6367e1e43..c524fdddc 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -202,6 +202,11 @@ static struct mount *last_dest, *last_source, *dest_master;
static struct mountpoint *mp;
static struct hlist_head *list;
+static inline bool peers(struct mount *m1, struct mount *m2)
+{
+ return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
+}
+
static int propagate_one(struct mount *m)
{
struct mount *child;
@@ -212,7 +217,7 @@ static int propagate_one(struct mount *m)
/* skip if mountpoint isn't covered by it */
if (!is_subdir(mp->m_dentry, m->mnt.mnt_root))
return 0;
- if (m->mnt_group_id == last_dest->mnt_group_id) {
+ if (peers(m, last_dest)) {
type = CL_MAKE_SHARED;
} else {
struct mount *n, *p;
@@ -223,7 +228,7 @@ static int propagate_one(struct mount *m)
last_source = last_source->mnt_master;
last_dest = last_source->mnt_parent;
}
- if (n->mnt_group_id != last_dest->mnt_group_id) {
+ if (!peers(n, last_dest)) {
last_source = last_source->mnt_master;
last_dest = last_source->mnt_parent;
}
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 4adde1e2c..711dd5170 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -769,8 +769,6 @@ posix_acl_xattr_get(const struct xattr_handler *handler,
struct posix_acl *acl;
int error;
- if (strcmp(name, "") != 0)
- return -EINVAL;
if (!IS_POSIXACL(d_backing_inode(dentry)))
return -EOPNOTSUPP;
if (d_is_symlink(dentry))
@@ -797,8 +795,6 @@ posix_acl_xattr_set(const struct xattr_handler *handler,
struct posix_acl *acl = NULL;
int ret;
- if (strcmp(name, "") != 0)
- return -EINVAL;
if (!IS_POSIXACL(inode))
return -EOPNOTSUPP;
if (!inode->i_op->set_acl)
@@ -827,25 +823,14 @@ out:
return ret;
}
-static size_t
-posix_acl_xattr_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
+static bool
+posix_acl_xattr_list(struct dentry *dentry)
{
- const char *xname = handler->prefix;
- size_t size;
-
- if (!IS_POSIXACL(d_backing_inode(dentry)))
- return 0;
-
- size = strlen(xname) + 1;
- if (list && size <= list_size)
- memcpy(list, xname, size);
- return size;
+ return IS_POSIXACL(d_backing_inode(dentry));
}
const struct xattr_handler posix_acl_access_xattr_handler = {
- .prefix = POSIX_ACL_XATTR_ACCESS,
+ .name = XATTR_NAME_POSIX_ACL_ACCESS,
.flags = ACL_TYPE_ACCESS,
.list = posix_acl_xattr_list,
.get = posix_acl_xattr_get,
@@ -854,7 +839,7 @@ const struct xattr_handler posix_acl_access_xattr_handler = {
EXPORT_SYMBOL_GPL(posix_acl_access_xattr_handler);
const struct xattr_handler posix_acl_default_xattr_handler = {
- .prefix = POSIX_ACL_XATTR_DEFAULT,
+ .name = XATTR_NAME_POSIX_ACL_DEFAULT,
.flags = ACL_TYPE_DEFAULT,
.list = posix_acl_xattr_list,
.get = posix_acl_xattr_get,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 8be0e4cd2..4f764c2ac 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -953,6 +953,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
unsigned long src = *ppos;
int ret = 0;
struct mm_struct *mm = file->private_data;
+ unsigned long env_start, env_end;
if (!mm)
return 0;
@@ -964,19 +965,25 @@ static ssize_t environ_read(struct file *file, char __user *buf,
ret = 0;
if (!atomic_inc_not_zero(&mm->mm_users))
goto free;
+
+ down_read(&mm->mmap_sem);
+ env_start = mm->env_start;
+ env_end = mm->env_end;
+ up_read(&mm->mmap_sem);
+
while (count > 0) {
size_t this_len, max_len;
int retval;
- if (src >= (mm->env_end - mm->env_start))
+ if (src >= (env_end - env_start))
break;
- this_len = mm->env_end - (mm->env_start + src);
+ this_len = env_end - (env_start + src);
max_len = min_t(size_t, PAGE_SIZE, count);
this_len = min(max_len, this_len);
- retval = access_remote_vm(mm, (mm->env_start + src),
+ retval = access_remote_vm(mm, (env_start + src),
page, this_len, 0);
if (retval <= 0) {
@@ -1565,12 +1572,16 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
return -ENOENT;
}
-static const char *proc_pid_follow_link(struct dentry *dentry, void **cookie)
+static const char *proc_pid_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct inode *inode = d_inode(dentry);
struct path path;
int error = -EACCES;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
/* Are we allowed to snoop on the tasks file descriptors? */
if (!proc_fd_access_allowed(inode))
goto out;
@@ -1631,7 +1642,7 @@ out:
const struct inode_operations proc_pid_link_inode_operations = {
.readlink = proc_pid_readlink,
- .follow_link = proc_pid_follow_link,
+ .get_link = proc_pid_get_link,
.setattr = proc_setattr,
};
@@ -1896,7 +1907,7 @@ static const struct dentry_operations tid_map_files_dentry_operations = {
.d_delete = pid_delete_dentry,
};
-static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
+static int map_files_get_link(struct dentry *dentry, struct path *path)
{
unsigned long vm_start, vm_end;
struct vm_area_struct *vma;
@@ -1922,7 +1933,7 @@ static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
down_read(&mm->mmap_sem);
vma = find_exact_vma(mm, vm_start, vm_end);
if (vma && vma->vm_file) {
- *path = vma_pr_or_file(vma)->f_path;
+ *path = vma->vm_file->f_path;
path_get(path);
rc = 0;
}
@@ -1946,20 +1957,22 @@ struct map_files_info {
* path to the file in question.
*/
static const char *
-proc_map_files_follow_link(struct dentry *dentry, void **cookie)
+proc_map_files_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
if (!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
- return proc_pid_follow_link(dentry, NULL);
+ return proc_pid_get_link(dentry, inode, done);
}
/*
- * Identical to proc_pid_link_inode_operations except for follow_link()
+ * Identical to proc_pid_link_inode_operations except for get_link()
*/
static const struct inode_operations proc_map_files_link_inode_operations = {
.readlink = proc_pid_readlink,
- .follow_link = proc_map_files_follow_link,
+ .get_link = proc_map_files_get_link,
.setattr = proc_setattr,
};
@@ -1976,7 +1989,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
return -ENOENT;
ei = PROC_I(inode);
- ei->op.proc_get_link = proc_map_files_get_link;
+ ei->op.proc_get_link = map_files_get_link;
inode->i_op = &proc_map_files_link_inode_operations;
inode->i_size = 64;
@@ -2360,7 +2373,7 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
size_t count, loff_t *ppos)
{
struct inode * inode = file_inode(file);
- char *page;
+ void *page;
ssize_t length;
struct task_struct *task = get_proc_task(inode);
@@ -2375,14 +2388,11 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
if (*ppos != 0)
goto out;
- length = -ENOMEM;
- page = (char*)__get_free_page(GFP_TEMPORARY);
- if (!page)
+ page = memdup_user(buf, count);
+ if (IS_ERR(page)) {
+ length = PTR_ERR(page);
goto out;
-
- length = -EFAULT;
- if (copy_from_user(page, buf, count))
- goto out_free;
+ }
/* Guard against adverse ptrace interaction */
length = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
@@ -2391,10 +2401,10 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
length = security_setprocattr(task,
(char*)file->f_path.dentry->d_name.name,
- (void*)page, count);
+ page, count);
mutex_unlock(&task->signal->cred_guard_mutex);
out_free:
- free_page((unsigned long) page);
+ kfree(page);
out:
put_task_struct(task);
out_no_task:
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 3c2a915c6..56afa5ef0 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -258,6 +258,7 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx,
name, len, instantiate, p,
(void *)(unsigned long)fd))
goto out_fd_loop;
+ cond_resched();
rcu_read_lock();
}
rcu_read_unlock();
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index bd95b9fde..42305ddcb 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -95,7 +95,8 @@ void __init proc_init_inodecache(void)
proc_inode_cachep = kmem_cache_create("proc_inode_cache",
sizeof(struct proc_inode),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_PANIC),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT|
+ SLAB_PANIC),
init_once);
}
@@ -393,24 +394,25 @@ static const struct file_operations proc_reg_file_ops_no_compat = {
};
#endif
-static const char *proc_follow_link(struct dentry *dentry, void **cookie)
+static void proc_put_link(void *p)
{
- struct proc_dir_entry *pde = PDE(d_inode(dentry));
- if (unlikely(!use_pde(pde)))
- return ERR_PTR(-EINVAL);
- *cookie = pde;
- return pde->data;
+ unuse_pde(p);
}
-static void proc_put_link(struct inode *unused, void *p)
+static const char *proc_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- unuse_pde(p);
+ struct proc_dir_entry *pde = PDE(inode);
+ if (unlikely(!use_pde(pde)))
+ return ERR_PTR(-EINVAL);
+ set_delayed_call(done, proc_put_link, pde);
+ return pde->data;
}
const struct inode_operations proc_link_inode_operations = {
.readlink = generic_readlink,
- .follow_link = proc_follow_link,
- .put_link = proc_put_link,
+ .get_link = proc_get_link,
};
struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 92e6726f6..a939f5ed7 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -552,9 +552,9 @@ static int open_kcore(struct inode *inode, struct file *filp)
if (kcore_need_update)
kcore_update_ram();
if (i_size_read(inode) != proc_root_kcore->size) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
i_size_write(inode, proc_root_kcore->size);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
return 0;
}
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index ae836fbb0..70b9f953b 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -57,11 +57,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
/*
* Estimate the amount of memory available for userspace allocations,
* without causing swapping.
- *
- * Free memory cannot be taken below the low watermark, before the
- * system starts swapping.
*/
- available = i.freeram - wmark_low;
+ available = i.freeram - totalreserve_pages;
/*
* Not all the page cache can be freed, otherwise the system will
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 1b0ea4a5d..276f12431 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -30,14 +30,18 @@ static const struct proc_ns_operations *ns_entries[] = {
&mntns_operations,
};
-static const char *proc_ns_follow_link(struct dentry *dentry, void **cookie)
+static const char *proc_ns_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct inode *inode = d_inode(dentry);
const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
struct task_struct *task;
struct path ns_path;
void *error = ERR_PTR(-EACCES);
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
task = get_proc_task(inode);
if (!task)
return error;
@@ -74,7 +78,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
static const struct inode_operations proc_ns_link_inode_operations = {
.readlink = proc_ns_readlink,
- .follow_link = proc_ns_follow_link,
+ .get_link = proc_ns_get_link,
.setattr = proc_setattr,
};
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index cb8eda060..f8595e8b5 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -45,10 +45,7 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region)
file = region->vm_file;
if (file) {
- struct inode *inode;
-
- file = vmr_pr_or_file(region);
- inode = file_inode(file);
+ struct inode *inode = file_inode(region->vm_file);
dev = inode->i_sb->s_dev;
ino = inode->i_ino;
}
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 93484034a..b2855eea5 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -103,9 +103,9 @@ u64 stable_page_flags(struct page *page)
* pseudo flags for the well known (anonymous) memory mapped pages
*
* Note that page->_mapcount is overloaded in SLOB/SLUB/SLQB, so the
- * simple test in page_mapped() is not enough.
+ * simple test in page_mapcount() is not enough.
*/
- if (!PageSlab(page) && page_mapped(page))
+ if (!PageSlab(page) && page_mapcount(page))
u |= 1 << KPF_MMAP;
if (PageAnon(page))
u |= 1 << KPF_ANON;
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 113b8d061..b6a8d3529 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -18,26 +18,28 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
return readlink_copy(buffer, buflen, tmp);
}
-static const char *proc_self_follow_link(struct dentry *dentry, void **cookie)
+static const char *proc_self_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct pid_namespace *ns = dentry->d_sb->s_fs_info;
+ struct pid_namespace *ns = inode->i_sb->s_fs_info;
pid_t tgid = task_tgid_nr_ns(current, ns);
char *name;
if (!tgid)
return ERR_PTR(-ENOENT);
/* 11 for max length of signed int in decimal + NULL term */
- name = kmalloc(12, GFP_KERNEL);
- if (!name)
- return ERR_PTR(-ENOMEM);
+ name = kmalloc(12, dentry ? GFP_KERNEL : GFP_ATOMIC);
+ if (unlikely(!name))
+ return dentry ? ERR_PTR(-ENOMEM) : ERR_PTR(-ECHILD);
sprintf(name, "%d", tgid);
- return *cookie = name;
+ set_delayed_call(done, kfree_link, name);
+ return name;
}
static const struct inode_operations proc_self_inode_operations = {
.readlink = proc_self_readlink,
- .follow_link = proc_self_follow_link,
- .put_link = kfree_put_link,
+ .get_link = proc_self_get_link,
};
static unsigned self_inum;
@@ -48,7 +50,7 @@ int proc_setup_self(struct super_block *s)
struct pid_namespace *ns = s->s_fs_info;
struct dentry *self;
- mutex_lock(&root_inode->i_mutex);
+ inode_lock(root_inode);
self = d_alloc_name(s->s_root, "self");
if (self) {
struct inode *inode = new_inode_pseudo(s);
@@ -67,7 +69,7 @@ int proc_setup_self(struct super_block *s)
} else {
self = ERR_PTR(-ENOMEM);
}
- mutex_unlock(&root_inode->i_mutex);
+ inode_unlock(root_inode);
if (IS_ERR(self)) {
pr_err("proc_fill_super: can't allocate /proc/self\n");
return PTR_ERR(self);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index e4733feb0..fa95ab2d3 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -14,6 +14,7 @@
#include <linux/swapops.h>
#include <linux/mmu_notifier.h>
#include <linux/page_idle.h>
+#include <linux/shmem_fs.h>
#include <asm/elf.h>
#include <asm/uaccess.h>
@@ -22,9 +23,13 @@
void task_mem(struct seq_file *m, struct mm_struct *mm)
{
- unsigned long data, text, lib, swap, ptes, pmds;
+ unsigned long text, lib, swap, ptes, pmds, anon, file, shmem;
unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
+ anon = get_mm_counter(mm, MM_ANONPAGES);
+ file = get_mm_counter(mm, MM_FILEPAGES);
+ shmem = get_mm_counter(mm, MM_SHMEMPAGES);
+
/*
* Note: to minimize their overhead, mm maintains hiwater_vm and
* hiwater_rss only when about to *lower* total_vm or rss. Any
@@ -35,11 +40,10 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
hiwater_vm = total_vm = mm->total_vm;
if (hiwater_vm < mm->hiwater_vm)
hiwater_vm = mm->hiwater_vm;
- hiwater_rss = total_rss = get_mm_rss(mm);
+ hiwater_rss = total_rss = anon + file + shmem;
if (hiwater_rss < mm->hiwater_rss)
hiwater_rss = mm->hiwater_rss;
- data = mm->total_vm - mm->shared_vm - mm->stack_vm;
text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
swap = get_mm_counter(mm, MM_SWAPENTS);
@@ -52,6 +56,9 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
"VmPin:\t%8lu kB\n"
"VmHWM:\t%8lu kB\n"
"VmRSS:\t%8lu kB\n"
+ "RssAnon:\t%8lu kB\n"
+ "RssFile:\t%8lu kB\n"
+ "RssShmem:\t%8lu kB\n"
"VmData:\t%8lu kB\n"
"VmStk:\t%8lu kB\n"
"VmExe:\t%8lu kB\n"
@@ -65,7 +72,10 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
mm->pinned_vm << (PAGE_SHIFT-10),
hiwater_rss << (PAGE_SHIFT-10),
total_rss << (PAGE_SHIFT-10),
- data << (PAGE_SHIFT-10),
+ anon << (PAGE_SHIFT-10),
+ file << (PAGE_SHIFT-10),
+ shmem << (PAGE_SHIFT-10),
+ mm->data_vm << (PAGE_SHIFT-10),
mm->stack_vm << (PAGE_SHIFT-10), text, lib,
ptes >> 10,
pmds >> 10,
@@ -82,10 +92,11 @@ unsigned long task_statm(struct mm_struct *mm,
unsigned long *shared, unsigned long *text,
unsigned long *data, unsigned long *resident)
{
- *shared = get_mm_counter(mm, MM_FILEPAGES);
+ *shared = get_mm_counter(mm, MM_FILEPAGES) +
+ get_mm_counter(mm, MM_SHMEMPAGES);
*text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
>> PAGE_SHIFT;
- *data = mm->total_vm - mm->shared_vm;
+ *data = mm->data_vm + mm->stack_vm;
*resident = *shared + get_mm_counter(mm, MM_ANONPAGES);
return mm->total_vm;
}
@@ -248,23 +259,29 @@ static int do_maps_open(struct inode *inode, struct file *file,
sizeof(struct proc_maps_private));
}
-static pid_t pid_of_stack(struct proc_maps_private *priv,
- struct vm_area_struct *vma, bool is_pid)
+/*
+ * Indicate if the VMA is a stack for the given task; for
+ * /proc/PID/maps that is the stack of the main task.
+ */
+static int is_stack(struct proc_maps_private *priv,
+ struct vm_area_struct *vma, int is_pid)
{
- struct inode *inode = priv->inode;
- struct task_struct *task;
- pid_t ret = 0;
+ int stack = 0;
- rcu_read_lock();
- task = pid_task(proc_pid(inode), PIDTYPE_PID);
- if (task) {
- task = task_of_stack(task, vma, is_pid);
+ if (is_pid) {
+ stack = vma->vm_start <= vma->vm_mm->start_stack &&
+ vma->vm_end >= vma->vm_mm->start_stack;
+ } else {
+ struct inode *inode = priv->inode;
+ struct task_struct *task;
+
+ rcu_read_lock();
+ task = pid_task(proc_pid(inode), PIDTYPE_PID);
if (task)
- ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info);
+ stack = vma_is_stack_for_task(vma, task);
+ rcu_read_unlock();
}
- rcu_read_unlock();
-
- return ret;
+ return stack;
}
static void
@@ -281,10 +298,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
const char *name = NULL;
if (file) {
- struct inode *inode;
-
- file = vma_pr_or_file(vma);
- inode = file_inode(file);
+ struct inode *inode = file_inode(vma->vm_file);
dev = inode->i_sb->s_dev;
ino = inode->i_ino;
pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
@@ -327,8 +341,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
name = arch_vma_name(vma);
if (!name) {
- pid_t tid;
-
if (!mm) {
name = "[vdso]";
goto done;
@@ -340,21 +352,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
goto done;
}
- tid = pid_of_stack(priv, vma, is_pid);
- if (tid != 0) {
- /*
- * Thread stack in /proc/PID/task/TID/maps or
- * the main process stack.
- */
- if (!is_pid || (vma->vm_start <= mm->start_stack &&
- vma->vm_end >= mm->start_stack)) {
- name = "[stack]";
- } else {
- /* Thread stack in /proc/PID/maps */
- seq_pad(m, ' ');
- seq_printf(m, "[stack:%d]", tid);
- }
- }
+ if (is_stack(priv, vma, is_pid))
+ name = "[stack]";
}
done:
@@ -454,12 +453,14 @@ struct mem_size_stats {
unsigned long private_hugetlb;
u64 pss;
u64 swap_pss;
+ bool check_shmem_swap;
};
static void smaps_account(struct mem_size_stats *mss, struct page *page,
- unsigned long size, bool young, bool dirty)
+ bool compound, bool young, bool dirty)
{
- int mapcount;
+ int i, nr = compound ? 1 << compound_order(page) : 1;
+ unsigned long size = nr * PAGE_SIZE;
if (PageAnon(page))
mss->anonymous += size;
@@ -468,26 +469,53 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
/* Accumulate the size in pages that have been accessed. */
if (young || page_is_young(page) || PageReferenced(page))
mss->referenced += size;
- mapcount = page_mapcount(page);
- if (mapcount >= 2) {
- u64 pss_delta;
- if (dirty || PageDirty(page))
- mss->shared_dirty += size;
- else
- mss->shared_clean += size;
- pss_delta = (u64)size << PSS_SHIFT;
- do_div(pss_delta, mapcount);
- mss->pss += pss_delta;
- } else {
+ /*
+ * page_count(page) == 1 guarantees the page is mapped exactly once.
+ * If any subpage of the compound page mapped with PTE it would elevate
+ * page_count().
+ */
+ if (page_count(page) == 1) {
if (dirty || PageDirty(page))
mss->private_dirty += size;
else
mss->private_clean += size;
mss->pss += (u64)size << PSS_SHIFT;
+ return;
+ }
+
+ for (i = 0; i < nr; i++, page++) {
+ int mapcount = page_mapcount(page);
+
+ if (mapcount >= 2) {
+ if (dirty || PageDirty(page))
+ mss->shared_dirty += PAGE_SIZE;
+ else
+ mss->shared_clean += PAGE_SIZE;
+ mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
+ } else {
+ if (dirty || PageDirty(page))
+ mss->private_dirty += PAGE_SIZE;
+ else
+ mss->private_clean += PAGE_SIZE;
+ mss->pss += PAGE_SIZE << PSS_SHIFT;
+ }
}
}
+#ifdef CONFIG_SHMEM
+static int smaps_pte_hole(unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct mem_size_stats *mss = walk->private;
+
+ mss->swap += shmem_partial_swap_usage(
+ walk->vma->vm_file->f_mapping, addr, end);
+
+ return 0;
+}
+#endif
+
static void smaps_pte_entry(pte_t *pte, unsigned long addr,
struct mm_walk *walk)
{
@@ -515,11 +543,25 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
}
} else if (is_migration_entry(swpent))
page = migration_entry_to_page(swpent);
+ } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap
+ && pte_none(*pte))) {
+ page = find_get_entry(vma->vm_file->f_mapping,
+ linear_page_index(vma, addr));
+ if (!page)
+ return;
+
+ if (radix_tree_exceptional_entry(page))
+ mss->swap += PAGE_SIZE;
+ else
+ page_cache_release(page);
+
+ return;
}
if (!page)
return;
- smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte));
+
+ smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte));
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -535,8 +577,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
if (IS_ERR_OR_NULL(page))
return;
mss->anonymous_thp += HPAGE_PMD_SIZE;
- smaps_account(mss, page, HPAGE_PMD_SIZE,
- pmd_young(*pmd), pmd_dirty(*pmd));
+ smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd));
}
#else
static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
@@ -552,7 +593,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pte_t *pte;
spinlock_t *ptl;
- if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
+ ptl = pmd_trans_huge_lock(pmd, vma);
+ if (ptl) {
smaps_pmd_entry(pmd, addr, walk);
spin_unlock(ptl);
return 0;
@@ -674,6 +716,31 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
};
memset(&mss, 0, sizeof mss);
+
+#ifdef CONFIG_SHMEM
+ if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) {
+ /*
+ * For shared or readonly shmem mappings we know that all
+ * swapped out pages belong to the shmem object, and we can
+ * obtain the swap value much more efficiently. For private
+ * writable mappings, we might have COW pages that are
+ * not affected by the parent swapped out pages of the shmem
+ * object, so we have to distinguish them during the page walk.
+ * Unless we know that the shmem object (or the part mapped by
+ * our VMA) has no swapped out pages at all.
+ */
+ unsigned long shmem_swapped = shmem_swap_usage(vma);
+
+ if (!shmem_swapped || (vma->vm_flags & VM_SHARED) ||
+ !(vma->vm_flags & VM_WRITE)) {
+ mss.swap = shmem_swapped;
+ } else {
+ mss.check_shmem_swap = true;
+ smaps_walk.pte_hole = smaps_pte_hole;
+ }
+ }
+#endif
+
/* mmap_sem is held in m_start */
walk_page_vma(vma, &smaps_walk);
@@ -820,9 +887,6 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
pmd = pmd_wrprotect(pmd);
pmd = pmd_clear_soft_dirty(pmd);
- if (vma->vm_flags & VM_SOFTDIRTY)
- vma->vm_flags &= ~VM_SOFTDIRTY;
-
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
}
#else
@@ -841,7 +905,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
spinlock_t *ptl;
struct page *page;
- if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
+ ptl = pmd_trans_huge_lock(pmd, vma);
+ if (ptl) {
if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
clear_soft_dirty_pmd(vma, addr, pmd);
goto out;
@@ -1115,7 +1180,8 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
int err = 0;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (pmd_trans_huge_lock(pmdp, vma, &ptl) == 1) {
+ ptl = pmd_trans_huge_lock(pmdp, vma);
+ if (ptl) {
u64 flags = 0, frame = 0;
pmd_t pmd = *pmdp;
@@ -1447,7 +1513,8 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
pte_t *orig_pte;
pte_t *pte;
- if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
+ ptl = pmd_trans_huge_lock(pmd, vma);
+ if (ptl) {
pte_t huge_pte = *(pte_t *)pmd;
struct page *page;
@@ -1509,7 +1576,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
struct vm_area_struct *vma = v;
struct numa_maps *md = &numa_priv->md;
- struct file *file = vma_pr_or_file(vma);
+ struct file *file = vma->vm_file;
struct mm_struct *mm = vma->vm_mm;
struct mm_walk walk = {
.hugetlb_entry = gather_hugetlb_stats,
@@ -1542,19 +1609,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
seq_file_path(m, file, "\n\t= ");
} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
seq_puts(m, " heap");
- } else {
- pid_t tid = pid_of_stack(proc_priv, vma, is_pid);
- if (tid != 0) {
- /*
- * Thread stack in /proc/PID/task/TID/maps or
- * the main process stack.
- */
- if (!is_pid || (vma->vm_start <= mm->start_stack &&
- vma->vm_end >= mm->start_stack))
- seq_puts(m, " stack");
- else
- seq_printf(m, " stack:%d", tid);
- }
+ } else if (is_stack(proc_priv, vma, is_pid)) {
+ seq_puts(m, " stack");
}
if (is_vm_hugetlb_page(vma))
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 7aa92dbf9..faacb0c0d 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -123,23 +123,26 @@ unsigned long task_statm(struct mm_struct *mm,
return size;
}
-static pid_t pid_of_stack(struct proc_maps_private *priv,
- struct vm_area_struct *vma, bool is_pid)
+static int is_stack(struct proc_maps_private *priv,
+ struct vm_area_struct *vma, int is_pid)
{
- struct inode *inode = priv->inode;
- struct task_struct *task;
- pid_t ret = 0;
-
- rcu_read_lock();
- task = pid_task(proc_pid(inode), PIDTYPE_PID);
- if (task) {
- task = task_of_stack(task, vma, is_pid);
+ struct mm_struct *mm = vma->vm_mm;
+ int stack = 0;
+
+ if (is_pid) {
+ stack = vma->vm_start <= mm->start_stack &&
+ vma->vm_end >= mm->start_stack;
+ } else {
+ struct inode *inode = priv->inode;
+ struct task_struct *task;
+
+ rcu_read_lock();
+ task = pid_task(proc_pid(inode), PIDTYPE_PID);
if (task)
- ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info);
+ stack = vma_is_stack_for_task(vma, task);
+ rcu_read_unlock();
}
- rcu_read_unlock();
-
- return ret;
+ return stack;
}
/*
@@ -160,10 +163,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
file = vma->vm_file;
if (file) {
- struct inode *inode;
-
- file = vma_pr_or_file(vma);
- inode = file_inode(file);
+ struct inode *inode = file_inode(vma->vm_file);
dev = inode->i_sb->s_dev;
ino = inode->i_ino;
pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
@@ -184,21 +184,9 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
if (file) {
seq_pad(m, ' ');
seq_file_path(m, file, "");
- } else if (mm) {
- pid_t tid = pid_of_stack(priv, vma, is_pid);
-
- if (tid != 0) {
- seq_pad(m, ' ');
- /*
- * Thread stack in /proc/PID/task/TID/maps or
- * the main process stack.
- */
- if (!is_pid || (vma->vm_start <= mm->start_stack &&
- vma->vm_end >= mm->start_stack))
- seq_printf(m, "[stack]");
- else
- seq_printf(m, "[stack:%d]", tid);
- }
+ } else if (mm && is_stack(priv, vma, is_pid)) {
+ seq_pad(m, ' ');
+ seq_printf(m, "[stack]");
}
seq_putc(m, '\n');
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index 947b0f4fd..e58a31e8f 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -19,26 +19,29 @@ static int proc_thread_self_readlink(struct dentry *dentry, char __user *buffer,
return readlink_copy(buffer, buflen, tmp);
}
-static const char *proc_thread_self_follow_link(struct dentry *dentry, void **cookie)
+static const char *proc_thread_self_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct pid_namespace *ns = dentry->d_sb->s_fs_info;
+ struct pid_namespace *ns = inode->i_sb->s_fs_info;
pid_t tgid = task_tgid_nr_ns(current, ns);
pid_t pid = task_pid_nr_ns(current, ns);
char *name;
if (!pid)
return ERR_PTR(-ENOENT);
- name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, GFP_KERNEL);
- if (!name)
- return ERR_PTR(-ENOMEM);
+ name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF,
+ dentry ? GFP_KERNEL : GFP_ATOMIC);
+ if (unlikely(!name))
+ return dentry ? ERR_PTR(-ENOMEM) : ERR_PTR(-ECHILD);
sprintf(name, "%d/task/%d", tgid, pid);
- return *cookie = name;
+ set_delayed_call(done, kfree_link, name);
+ return name;
}
static const struct inode_operations proc_thread_self_inode_operations = {
.readlink = proc_thread_self_readlink,
- .follow_link = proc_thread_self_follow_link,
- .put_link = kfree_put_link,
+ .get_link = proc_thread_self_get_link,
};
static unsigned thread_self_inum;
@@ -49,7 +52,7 @@ int proc_setup_thread_self(struct super_block *s)
struct pid_namespace *ns = s->s_fs_info;
struct dentry *thread_self;
- mutex_lock(&root_inode->i_mutex);
+ inode_lock(root_inode);
thread_self = d_alloc_name(s->s_root, "thread-self");
if (thread_self) {
struct inode *inode = new_inode_pseudo(s);
@@ -68,7 +71,7 @@ int proc_setup_thread_self(struct super_block *s)
} else {
thread_self = ERR_PTR(-ENOMEM);
}
- mutex_unlock(&root_inode->i_mutex);
+ inode_unlock(root_inode);
if (IS_ERR(thread_self)) {
pr_err("proc_fill_super: can't allocate /proc/thread_self\n");
return PTR_ERR(thread_self);
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 8ebd9a334..2256e7e23 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -95,9 +95,9 @@ static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
{
struct proc_mounts *p = m->private;
struct mount *r = real_mount(mnt);
- int err = 0;
struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
struct super_block *sb = mnt_path.dentry->d_sb;
+ int err;
if (sb->s_op->show_devname) {
err = sb->s_op->show_devname(m, mnt_path.dentry);
@@ -131,16 +131,17 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
struct mount *r = real_mount(mnt);
struct super_block *sb = mnt->mnt_sb;
struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
- int err = 0;
+ int err;
seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id,
MAJOR(sb->s_dev), MINOR(sb->s_dev));
- if (sb->s_op->show_path)
+ if (sb->s_op->show_path) {
err = sb->s_op->show_path(m, mnt->mnt_root);
- else
+ if (err)
+ goto out;
+ } else {
seq_dentry(m, mnt->mnt_root, " \t\n\\");
- if (err)
- goto out;
+ }
seq_putc(m, ' ');
/* mountpoints outside of chroot jail will give SEQ_SKIP on this */
@@ -168,12 +169,13 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
seq_puts(m, " - ");
show_type(m, sb);
seq_putc(m, ' ');
- if (sb->s_op->show_devname)
+ if (sb->s_op->show_devname) {
err = sb->s_op->show_devname(m, mnt->mnt_root);
- else
+ if (err)
+ goto out;
+ } else {
mangle(m, r->mnt_devname ? r->mnt_devname : "none");
- if (err)
- goto out;
+ }
seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw");
err = show_sb_opts(m, sb);
if (err)
@@ -191,7 +193,7 @@ static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
struct mount *r = real_mount(mnt);
struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
struct super_block *sb = mnt_path.dentry->d_sb;
- int err = 0;
+ int err;
/* device */
if (sb->s_op->show_devname) {
@@ -220,8 +222,7 @@ static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
/* optional statistics */
if (sb->s_op->show_stats) {
seq_putc(m, ' ');
- if (!err)
- err = sb->s_op->show_stats(m, mnt_path.dentry);
+ err = sb->s_op->show_stats(m, mnt_path.dentry);
}
seq_putc(m, '\n');
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index d8c439d81..dc645b66c 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -377,7 +377,7 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
break;
}
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
dentry = d_alloc_name(root, name);
if (!dentry)
@@ -397,12 +397,12 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
list_add(&private->list, &allpstore);
spin_unlock_irqrestore(&allpstore_lock, flags);
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
return 0;
fail_lockedalloc:
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
kfree(private);
fail_alloc:
iput(inode);
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index c4bcb7788..3a67cfb14 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -316,6 +316,7 @@ struct inode *qnx4_iget(struct super_block *sb, unsigned long ino)
inode->i_fop = &qnx4_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &qnx4_aops;
qnx4_i(inode)->mmu_private = inode->i_size;
} else {
@@ -364,7 +365,7 @@ static int init_inodecache(void)
qnx4_inode_cachep = kmem_cache_create("qnx4_inode_cache",
sizeof(struct qnx4_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
init_once);
if (qnx4_inode_cachep == NULL)
return -ENOMEM;
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 32d2e1a97..47bb1de07 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -582,6 +582,7 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
inode->i_mapping->a_ops = &qnx6_aops;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &qnx6_aops;
} else
init_special_inode(inode, inode->i_mode, 0);
@@ -624,7 +625,7 @@ static int init_inodecache(void)
qnx6_inode_cachep = kmem_cache_create("qnx6_inode_cache",
sizeof(struct qnx6_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
init_once);
if (!qnx6_inode_cachep)
return -ENOMEM;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index ef0d64b2a..3c3b81bb6 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -682,9 +682,9 @@ int dquot_quota_sync(struct super_block *sb, int type)
continue;
if (!sb_has_quota_active(sb, cnt))
continue;
- mutex_lock(&dqopt->files[cnt]->i_mutex);
+ inode_lock(dqopt->files[cnt]);
truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
- mutex_unlock(&dqopt->files[cnt]->i_mutex);
+ inode_unlock(dqopt->files[cnt]);
}
mutex_unlock(&dqopt->dqonoff_mutex);
@@ -2162,12 +2162,12 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags)
/* If quota was reenabled in the meantime, we have
* nothing to do */
if (!sb_has_quota_loaded(sb, cnt)) {
- mutex_lock(&toputinode[cnt]->i_mutex);
+ inode_lock(toputinode[cnt]);
toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
S_NOATIME | S_NOQUOTA);
truncate_inode_pages(&toputinode[cnt]->i_data,
0);
- mutex_unlock(&toputinode[cnt]->i_mutex);
+ inode_unlock(toputinode[cnt]);
mark_inode_dirty_sync(toputinode[cnt]);
}
mutex_unlock(&dqopt->dqonoff_mutex);
@@ -2258,11 +2258,11 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
/* We don't want quota and atime on quota files (deadlocks
* possible) Also nobody should write to the file - we use
* special IO operations which ignore the immutable bit. */
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE |
S_NOQUOTA);
inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/*
* When S_NOQUOTA is set, remove dquot references as no more
* references can be added
@@ -2305,12 +2305,12 @@ out_file_init:
iput(inode);
out_lock:
if (oldflags != -1) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* Set the flags back (in the case of accidental quotaon()
* on a wrong file we don't want to mess up the flags) */
inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE);
inode->i_flags |= oldflags;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
mutex_unlock(&dqopt->dqonoff_mutex);
out_fmt:
@@ -2430,9 +2430,9 @@ int dquot_quota_on_mount(struct super_block *sb, char *qf_name,
struct dentry *dentry;
int error;
- mutex_lock(&d_inode(sb->s_root)->i_mutex);
+ inode_lock(d_inode(sb->s_root));
dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name));
- mutex_unlock(&d_inode(sb->s_root)->i_mutex);
+ inode_unlock(d_inode(sb->s_root));
if (IS_ERR(dentry))
return PTR_ERR(dentry);
@@ -2924,4 +2924,4 @@ static int __init dquot_init(void)
return 0;
}
-module_init(dquot_init);
+fs_initcall(dquot_init);
diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c
index bb2869f5d..d07a2f91d 100644
--- a/fs/quota/netlink.c
+++ b/fs/quota/netlink.c
@@ -1,7 +1,5 @@
-
#include <linux/cred.h>
#include <linux/init.h>
-#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/quotaops.h>
#include <linux/sched.h>
@@ -105,5 +103,4 @@ static int __init quota_init(void)
"VFS: Failed to create quota netlink interface.\n");
return 0;
};
-
-module_init(quota_init);
+fs_initcall(quota_init);
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index 2aa012a68..ed85d4f35 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -30,13 +30,13 @@ static void v2r1_mem2diskdqb(void *dp, struct dquot *dquot);
static void v2r1_disk2memdqb(struct dquot *dquot, void *dp);
static int v2r1_is_id(void *dp, struct dquot *dquot);
-static struct qtree_fmt_operations v2r0_qtree_ops = {
+static const struct qtree_fmt_operations v2r0_qtree_ops = {
.mem2disk_dqblk = v2r0_mem2diskdqb,
.disk2mem_dqblk = v2r0_disk2memdqb,
.is_id = v2r0_is_id,
};
-static struct qtree_fmt_operations v2r1_qtree_ops = {
+static const struct qtree_fmt_operations v2r1_qtree_ops = {
.mem2disk_dqblk = v2r1_mem2diskdqb,
.disk2mem_dqblk = v2r1_disk2memdqb,
.is_id = v2r1_is_id,
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 889d558b4..38981b037 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -79,6 +79,7 @@ struct inode *ramfs_get_inode(struct super_block *sb,
break;
case S_IFLNK:
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
break;
}
}
diff --git a/fs/read_write.c b/fs/read_write.c
index 0a2893354..dadf24e5c 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -16,6 +16,8 @@
#include <linux/pagemap.h>
#include <linux/splice.h>
#include <linux/compat.h>
+#include <linux/mount.h>
+#include <linux/fs.h>
#include "internal.h"
#include <asm/uaccess.h>
@@ -171,6 +173,45 @@ loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t si
EXPORT_SYMBOL(fixed_size_llseek);
/**
+ * no_seek_end_llseek - llseek implementation for fixed-sized devices
+ * @file: file structure to seek on
+ * @offset: file offset to seek to
+ * @whence: type of seek
+ *
+ */
+loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence)
+{
+ switch (whence) {
+ case SEEK_SET: case SEEK_CUR:
+ return generic_file_llseek_size(file, offset, whence,
+ OFFSET_MAX, 0);
+ default:
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(no_seek_end_llseek);
+
+/**
+ * no_seek_end_llseek_size - llseek implementation for fixed-sized devices
+ * @file: file structure to seek on
+ * @offset: file offset to seek to
+ * @whence: type of seek
+ * @size: maximal offset allowed
+ *
+ */
+loff_t no_seek_end_llseek_size(struct file *file, loff_t offset, int whence, loff_t size)
+{
+ switch (whence) {
+ case SEEK_SET: case SEEK_CUR:
+ return generic_file_llseek_size(file, offset, whence,
+ size, 0);
+ default:
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(no_seek_end_llseek_size);
+
+/**
* noop_llseek - No Operation Performed llseek implementation
* @file: file structure to seek on
* @offset: file offset to seek to
@@ -198,7 +239,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int whence)
struct inode *inode = file_inode(file);
loff_t retval;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
switch (whence) {
case SEEK_END:
offset += i_size_read(inode);
@@ -243,7 +284,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int whence)
retval = offset;
}
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return retval;
}
EXPORT_SYMBOL(default_llseek);
@@ -395,9 +436,8 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t
}
if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
- retval = locks_mandatory_area(
- read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
- inode, file, pos, count);
+ retval = locks_mandatory_area(inode, file, pos, pos + count - 1,
+ read_write == READ ? F_RDLCK : F_WRLCK);
if (retval < 0)
return retval;
}
@@ -494,30 +534,6 @@ ssize_t __vfs_write(struct file *file, const char __user *p, size_t count,
}
EXPORT_SYMBOL(__vfs_write);
-vfs_readf_t vfs_readf(struct file *file)
-{
- const struct file_operations *fop = file->f_op;
-
- if (fop->read)
- return fop->read;
- if (fop->read_iter)
- return new_sync_read;
- return ERR_PTR(-ENOSYS);
-}
-EXPORT_SYMBOL_GPL(vfs_readf);
-
-vfs_writef_t vfs_writef(struct file *file)
-{
- const struct file_operations *fop = file->f_op;
-
- if (fop->write)
- return fop->write;
- if (fop->write_iter)
- return new_sync_write;
- return ERR_PTR(-ENOSYS);
-}
-EXPORT_SYMBOL_GPL(vfs_writef);
-
ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
{
mm_segment_t old_fs;
@@ -1351,3 +1367,304 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
return do_sendfile(out_fd, in_fd, NULL, count, 0);
}
#endif
+
+/*
+ * copy_file_range() differs from regular file read and write in that it
+ * specifically allows return partial success. When it does so is up to
+ * the copy_file_range method.
+ */
+ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t len, unsigned int flags)
+{
+ struct inode *inode_in = file_inode(file_in);
+ struct inode *inode_out = file_inode(file_out);
+ ssize_t ret;
+
+ if (flags != 0)
+ return -EINVAL;
+
+ /* copy_file_range allows full ssize_t len, ignoring MAX_RW_COUNT */
+ ret = rw_verify_area(READ, file_in, &pos_in, len);
+ if (ret >= 0)
+ ret = rw_verify_area(WRITE, file_out, &pos_out, len);
+ if (ret < 0)
+ return ret;
+
+ if (!(file_in->f_mode & FMODE_READ) ||
+ !(file_out->f_mode & FMODE_WRITE) ||
+ (file_out->f_flags & O_APPEND))
+ return -EBADF;
+
+ /* this could be relaxed once a method supports cross-fs copies */
+ if (inode_in->i_sb != inode_out->i_sb)
+ return -EXDEV;
+
+ if (len == 0)
+ return 0;
+
+ ret = mnt_want_write_file(file_out);
+ if (ret)
+ return ret;
+
+ ret = -EOPNOTSUPP;
+ if (file_out->f_op->copy_file_range)
+ ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out,
+ pos_out, len, flags);
+ if (ret == -EOPNOTSUPP)
+ ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
+ len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
+
+ if (ret > 0) {
+ fsnotify_access(file_in);
+ add_rchar(current, ret);
+ fsnotify_modify(file_out);
+ add_wchar(current, ret);
+ }
+ inc_syscr(current);
+ inc_syscw(current);
+
+ mnt_drop_write_file(file_out);
+
+ return ret;
+}
+EXPORT_SYMBOL(vfs_copy_file_range);
+
+SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
+ int, fd_out, loff_t __user *, off_out,
+ size_t, len, unsigned int, flags)
+{
+ loff_t pos_in;
+ loff_t pos_out;
+ struct fd f_in;
+ struct fd f_out;
+ ssize_t ret = -EBADF;
+
+ f_in = fdget(fd_in);
+ if (!f_in.file)
+ goto out2;
+
+ f_out = fdget(fd_out);
+ if (!f_out.file)
+ goto out1;
+
+ ret = -EFAULT;
+ if (off_in) {
+ if (copy_from_user(&pos_in, off_in, sizeof(loff_t)))
+ goto out;
+ } else {
+ pos_in = f_in.file->f_pos;
+ }
+
+ if (off_out) {
+ if (copy_from_user(&pos_out, off_out, sizeof(loff_t)))
+ goto out;
+ } else {
+ pos_out = f_out.file->f_pos;
+ }
+
+ ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len,
+ flags);
+ if (ret > 0) {
+ pos_in += ret;
+ pos_out += ret;
+
+ if (off_in) {
+ if (copy_to_user(off_in, &pos_in, sizeof(loff_t)))
+ ret = -EFAULT;
+ } else {
+ f_in.file->f_pos = pos_in;
+ }
+
+ if (off_out) {
+ if (copy_to_user(off_out, &pos_out, sizeof(loff_t)))
+ ret = -EFAULT;
+ } else {
+ f_out.file->f_pos = pos_out;
+ }
+ }
+
+out:
+ fdput(f_out);
+out1:
+ fdput(f_in);
+out2:
+ return ret;
+}
+
+static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
+{
+ struct inode *inode = file_inode(file);
+
+ if (unlikely(pos < 0))
+ return -EINVAL;
+
+ if (unlikely((loff_t) (pos + len) < 0))
+ return -EINVAL;
+
+ if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
+ loff_t end = len ? pos + len - 1 : OFFSET_MAX;
+ int retval;
+
+ retval = locks_mandatory_area(inode, file, pos, end,
+ write ? F_WRLCK : F_RDLCK);
+ if (retval < 0)
+ return retval;
+ }
+
+ return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
+}
+
+int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out, u64 len)
+{
+ struct inode *inode_in = file_inode(file_in);
+ struct inode *inode_out = file_inode(file_out);
+ int ret;
+
+ if (inode_in->i_sb != inode_out->i_sb ||
+ file_in->f_path.mnt != file_out->f_path.mnt)
+ return -EXDEV;
+
+ if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
+ return -EISDIR;
+ if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+ return -EINVAL;
+
+ if (!(file_in->f_mode & FMODE_READ) ||
+ !(file_out->f_mode & FMODE_WRITE) ||
+ (file_out->f_flags & O_APPEND))
+ return -EBADF;
+
+ if (!file_in->f_op->clone_file_range)
+ return -EOPNOTSUPP;
+
+ ret = clone_verify_area(file_in, pos_in, len, false);
+ if (ret)
+ return ret;
+
+ ret = clone_verify_area(file_out, pos_out, len, true);
+ if (ret)
+ return ret;
+
+ if (pos_in + len > i_size_read(inode_in))
+ return -EINVAL;
+
+ ret = mnt_want_write_file(file_out);
+ if (ret)
+ return ret;
+
+ ret = file_in->f_op->clone_file_range(file_in, pos_in,
+ file_out, pos_out, len);
+ if (!ret) {
+ fsnotify_access(file_in);
+ fsnotify_modify(file_out);
+ }
+
+ mnt_drop_write_file(file_out);
+ return ret;
+}
+EXPORT_SYMBOL(vfs_clone_file_range);
+
+int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
+{
+ struct file_dedupe_range_info *info;
+ struct inode *src = file_inode(file);
+ u64 off;
+ u64 len;
+ int i;
+ int ret;
+ bool is_admin = capable(CAP_SYS_ADMIN);
+ u16 count = same->dest_count;
+ struct file *dst_file;
+ loff_t dst_off;
+ ssize_t deduped;
+
+ if (!(file->f_mode & FMODE_READ))
+ return -EINVAL;
+
+ if (same->reserved1 || same->reserved2)
+ return -EINVAL;
+
+ off = same->src_offset;
+ len = same->src_length;
+
+ ret = -EISDIR;
+ if (S_ISDIR(src->i_mode))
+ goto out;
+
+ ret = -EINVAL;
+ if (!S_ISREG(src->i_mode))
+ goto out;
+
+ ret = clone_verify_area(file, off, len, false);
+ if (ret < 0)
+ goto out;
+ ret = 0;
+
+ /* pre-format output fields to sane values */
+ for (i = 0; i < count; i++) {
+ same->info[i].bytes_deduped = 0ULL;
+ same->info[i].status = FILE_DEDUPE_RANGE_SAME;
+ }
+
+ for (i = 0, info = same->info; i < count; i++, info++) {
+ struct inode *dst;
+ struct fd dst_fd = fdget(info->dest_fd);
+
+ dst_file = dst_fd.file;
+ if (!dst_file) {
+ info->status = -EBADF;
+ goto next_loop;
+ }
+ dst = file_inode(dst_file);
+
+ ret = mnt_want_write_file(dst_file);
+ if (ret) {
+ info->status = ret;
+ goto next_loop;
+ }
+
+ dst_off = info->dest_offset;
+ ret = clone_verify_area(dst_file, dst_off, len, true);
+ if (ret < 0) {
+ info->status = ret;
+ goto next_file;
+ }
+ ret = 0;
+
+ if (info->reserved) {
+ info->status = -EINVAL;
+ } else if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
+ info->status = -EINVAL;
+ } else if (file->f_path.mnt != dst_file->f_path.mnt) {
+ info->status = -EXDEV;
+ } else if (S_ISDIR(dst->i_mode)) {
+ info->status = -EISDIR;
+ } else if (dst_file->f_op->dedupe_file_range == NULL) {
+ info->status = -EINVAL;
+ } else {
+ deduped = dst_file->f_op->dedupe_file_range(file, off,
+ len, dst_file,
+ info->dest_offset);
+ if (deduped == -EBADE)
+ info->status = FILE_DEDUPE_RANGE_DIFFERS;
+ else if (deduped < 0)
+ info->status = deduped;
+ else
+ info->bytes_deduped += deduped;
+ }
+
+next_file:
+ mnt_drop_write_file(dst_file);
+next_loop:
+ fdput(dst_fd);
+
+ if (fatal_signal_pending(current))
+ goto out;
+ }
+
+out:
+ return ret;
+}
+EXPORT_SYMBOL(vfs_dedupe_file_range);
diff --git a/fs/readdir.c b/fs/readdir.c
index ced679179..e69ef3b79 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -44,7 +44,7 @@ int iterate_dir(struct file *file, struct dir_context *ctx)
fsnotify_access(file);
file_accessed(file);
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
out:
return res;
}
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 4a024e2ce..3abd40041 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -38,11 +38,11 @@ static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
if (err)
return err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
reiserfs_write_lock(inode->i_sb);
err = reiserfs_commit_for_inode(inode);
reiserfs_write_unlock(inode->i_sb);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (err < 0)
return err;
return 0;
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 96a1bcf33..9424a4ba9 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -158,7 +158,7 @@ static int reiserfs_sync_file(struct file *filp, loff_t start, loff_t end,
if (err)
return err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
BUG_ON(!S_ISREG(inode->i_mode));
err = sync_mapping_buffers(inode->i_mapping);
reiserfs_write_lock(inode->i_sb);
@@ -166,7 +166,7 @@ static int reiserfs_sync_file(struct file *filp, loff_t start, loff_t end,
reiserfs_write_unlock(inode->i_sb);
if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb))
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (barrier_done < 0)
return barrier_done;
return (err < 0) ? -EIO : 0;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 3d8e7e671..ae9e5b308 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1361,6 +1361,7 @@ static void init_inode(struct inode *inode, struct treepath *path)
inode->i_fop = &reiserfs_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &reiserfs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &reiserfs_address_space_operations;
} else {
inode->i_blocks = 0;
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 6ec8a30a0..036a1fc0a 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -224,7 +224,7 @@ out_unlock:
page_cache_release(page);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
reiserfs_write_unlock(inode->i_sb);
return retval;
}
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 9d6486d41..44c2bdced 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -618,12 +618,10 @@ static void release_buffer_page(struct buffer_head *bh)
static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
{
- char b[BDEVNAME_SIZE];
-
if (buffer_journaled(bh)) {
reiserfs_warning(NULL, "clm-2084",
- "pinned buffer %lu:%s sent to disk",
- bh->b_blocknr, bdevname(bh->b_bdev, b));
+ "pinned buffer %lu:%pg sent to disk",
+ bh->b_blocknr, bh->b_bdev);
}
if (uptodate)
set_buffer_uptodate(bh);
@@ -2387,11 +2385,10 @@ static int journal_read(struct super_block *sb)
int replay_count = 0;
int continue_replay = 1;
int ret;
- char b[BDEVNAME_SIZE];
cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(sb);
- reiserfs_info(sb, "checking transaction log (%s)\n",
- bdevname(journal->j_dev_bd, b));
+ reiserfs_info(sb, "checking transaction log (%pg)\n",
+ journal->j_dev_bd);
start = get_seconds();
/*
@@ -2651,8 +2648,8 @@ static int journal_init_dev(struct super_block *super,
set_blocksize(journal->j_dev_bd, super->s_blocksize);
reiserfs_info(super,
- "journal_init_dev: journal device: %s\n",
- bdevname(journal->j_dev_bd, b));
+ "journal_init_dev: journal device: %pg\n",
+ journal->j_dev_bd);
return 0;
}
@@ -2724,7 +2721,6 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
struct reiserfs_journal_header *jh;
struct reiserfs_journal *journal;
struct reiserfs_journal_list *jl;
- char b[BDEVNAME_SIZE];
int ret;
journal = SB_JOURNAL(sb) = vzalloc(sizeof(struct reiserfs_journal));
@@ -2794,10 +2790,10 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
&& (le32_to_cpu(jh->jh_journal.jp_journal_magic) !=
sb_jp_journal_magic(rs))) {
reiserfs_warning(sb, "sh-460",
- "journal header magic %x (device %s) does "
+ "journal header magic %x (device %pg) does "
"not match to magic found in super block %x",
jh->jh_journal.jp_journal_magic,
- bdevname(journal->j_dev_bd, b),
+ journal->j_dev_bd,
sb_jp_journal_magic(rs));
brelse(bhjh);
goto free_and_return;
@@ -2818,10 +2814,10 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
journal->j_max_trans_age = commit_max_age;
}
- reiserfs_info(sb, "journal params: device %s, size %u, "
+ reiserfs_info(sb, "journal params: device %pg, size %u, "
"journal first block %u, max trans len %u, max batch %u, "
"max commit age %u, max trans age %u\n",
- bdevname(journal->j_dev_bd, b),
+ journal->j_dev_bd,
SB_ONDISK_JOURNAL_SIZE(sb),
SB_ONDISK_JOURNAL_1st_BLOCK(sb),
journal->j_trans_max,
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 47f96988f..2a12d46d7 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -1170,6 +1170,7 @@ static int reiserfs_symlink(struct inode *parent_dir,
reiserfs_update_inode_transaction(parent_dir);
inode->i_op = &reiserfs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &reiserfs_address_space_operations;
retval = reiserfs_add_entry(&th, parent_dir, dentry->d_name.name,
@@ -1664,8 +1665,7 @@ const struct inode_operations reiserfs_dir_inode_operations = {
*/
const struct inode_operations reiserfs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.setattr = reiserfs_setattr,
.setxattr = reiserfs_setxattr,
.getxattr = reiserfs_getxattr,
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index ae1dc841d..4f3f92807 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -139,11 +139,9 @@ static void sprintf_block_head(char *buf, struct buffer_head *bh)
static void sprintf_buffer_head(char *buf, struct buffer_head *bh)
{
- char b[BDEVNAME_SIZE];
-
sprintf(buf,
- "dev %s, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)",
- bdevname(bh->b_bdev, b), bh->b_size,
+ "dev %pg, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)",
+ bh->b_bdev, bh->b_size,
(unsigned long long)bh->b_blocknr, atomic_read(&(bh->b_count)),
bh->b_state, bh->b_page,
buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE",
@@ -530,7 +528,6 @@ static int print_super_block(struct buffer_head *bh)
(struct reiserfs_super_block *)(bh->b_data);
int skipped, data_blocks;
char *version;
- char b[BDEVNAME_SIZE];
if (is_reiserfs_3_5(rs)) {
version = "3.5";
@@ -543,7 +540,7 @@ static int print_super_block(struct buffer_head *bh)
return 1;
}
- printk("%s\'s super block is in block %llu\n", bdevname(bh->b_bdev, b),
+ printk("%pg\'s super block is in block %llu\n", bh->b_bdev,
(unsigned long long)bh->b_blocknr);
printk("Reiserfs version %s\n", version);
printk("Block count %u\n", sb_block_count(rs));
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 621b9f381..fe999157d 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -303,11 +303,10 @@ static int show_journal(struct seq_file *m, void *unused)
struct reiserfs_sb_info *r = REISERFS_SB(sb);
struct reiserfs_super_block *rs = r->s_rs;
struct journal_params *jp = &rs->s_v1.s_journal;
- char b[BDEVNAME_SIZE];
seq_printf(m, /* on-disk fields */
"jp_journal_1st_block: \t%i\n"
- "jp_journal_dev: \t%s[%x]\n"
+ "jp_journal_dev: \t%pg[%x]\n"
"jp_journal_size: \t%i\n"
"jp_journal_trans_max: \t%i\n"
"jp_journal_magic: \t%i\n"
@@ -348,7 +347,7 @@ static int show_journal(struct seq_file *m, void *unused)
"prepare: \t%12lu\n"
"prepare_retry: \t%12lu\n",
DJP(jp_journal_1st_block),
- bdevname(SB_JOURNAL(sb)->j_dev_bd, b),
+ SB_JOURNAL(sb)->j_dev_bd,
DJP(jp_journal_dev),
DJP(jp_journal_size),
DJP(jp_journal_trans_max),
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 4a62fe8cc..c0306ec8e 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -288,7 +288,7 @@ static int finish_unfinished(struct super_block *s)
pathrelse(&path);
inode = reiserfs_iget(s, &obj_key);
- if (!inode) {
+ if (IS_ERR_OR_NULL(inode)) {
/*
* the unlink almost completed, it just did not
* manage to remove "save" link and release objectid
@@ -626,7 +626,8 @@ static int __init init_inodecache(void)
sizeof(struct
reiserfs_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|
+ SLAB_ACCOUNT),
init_once);
if (reiserfs_inode_cachep == NULL)
return -ENOMEM;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 66b26fdff..57e0b2310 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -64,14 +64,14 @@
#ifdef CONFIG_REISERFS_FS_XATTR
static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
{
- BUG_ON(!mutex_is_locked(&dir->i_mutex));
+ BUG_ON(!inode_is_locked(dir));
return dir->i_op->create(dir, dentry, mode, true);
}
#endif
static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
- BUG_ON(!mutex_is_locked(&dir->i_mutex));
+ BUG_ON(!inode_is_locked(dir));
return dir->i_op->mkdir(dir, dentry, mode);
}
@@ -85,11 +85,11 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
{
int error;
- BUG_ON(!mutex_is_locked(&dir->i_mutex));
+ BUG_ON(!inode_is_locked(dir));
- mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
error = dir->i_op->unlink(dir, dentry);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
if (!error)
d_delete(dentry);
@@ -100,13 +100,13 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
{
int error;
- BUG_ON(!mutex_is_locked(&dir->i_mutex));
+ BUG_ON(!inode_is_locked(dir));
- mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
error = dir->i_op->rmdir(dir, dentry);
if (!error)
d_inode(dentry)->i_flags |= S_DEAD;
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
if (!error)
d_delete(dentry);
@@ -123,7 +123,7 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
if (d_really_is_negative(privroot))
return ERR_PTR(-ENODATA);
- mutex_lock_nested(&d_inode(privroot)->i_mutex, I_MUTEX_XATTR);
+ inode_lock_nested(d_inode(privroot), I_MUTEX_XATTR);
xaroot = dget(REISERFS_SB(sb)->xattr_root);
if (!xaroot)
@@ -139,7 +139,7 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
}
}
- mutex_unlock(&d_inode(privroot)->i_mutex);
+ inode_unlock(d_inode(privroot));
return xaroot;
}
@@ -156,7 +156,7 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags)
le32_to_cpu(INODE_PKEY(inode)->k_objectid),
inode->i_generation);
- mutex_lock_nested(&d_inode(xaroot)->i_mutex, I_MUTEX_XATTR);
+ inode_lock_nested(d_inode(xaroot), I_MUTEX_XATTR);
xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf));
if (!IS_ERR(xadir) && d_really_is_negative(xadir)) {
@@ -170,7 +170,7 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags)
}
}
- mutex_unlock(&d_inode(xaroot)->i_mutex);
+ inode_unlock(d_inode(xaroot));
dput(xaroot);
return xadir;
}
@@ -195,7 +195,7 @@ fill_with_dentries(struct dir_context *ctx, const char *name, int namelen,
container_of(ctx, struct reiserfs_dentry_buf, ctx);
struct dentry *dentry;
- WARN_ON_ONCE(!mutex_is_locked(&d_inode(dbuf->xadir)->i_mutex));
+ WARN_ON_ONCE(!inode_is_locked(d_inode(dbuf->xadir)));
if (dbuf->count == ARRAY_SIZE(dbuf->dentries))
return -ENOSPC;
@@ -254,7 +254,7 @@ static int reiserfs_for_each_xattr(struct inode *inode,
goto out_dir;
}
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_XATTR);
+ inode_lock_nested(d_inode(dir), I_MUTEX_XATTR);
buf.xadir = dir;
while (1) {
@@ -276,7 +276,7 @@ static int reiserfs_for_each_xattr(struct inode *inode,
break;
buf.count = 0;
}
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
cleanup_dentry_buf(&buf);
@@ -298,13 +298,13 @@ static int reiserfs_for_each_xattr(struct inode *inode,
if (!err) {
int jerror;
- mutex_lock_nested(&d_inode(dir->d_parent)->i_mutex,
+ inode_lock_nested(d_inode(dir->d_parent),
I_MUTEX_XATTR);
err = action(dir, data);
reiserfs_write_lock(inode->i_sb);
jerror = journal_end(&th);
reiserfs_write_unlock(inode->i_sb);
- mutex_unlock(&d_inode(dir->d_parent)->i_mutex);
+ inode_unlock(d_inode(dir->d_parent));
err = jerror ?: err;
}
}
@@ -384,7 +384,7 @@ static struct dentry *xattr_lookup(struct inode *inode, const char *name,
if (IS_ERR(xadir))
return ERR_CAST(xadir);
- mutex_lock_nested(&d_inode(xadir)->i_mutex, I_MUTEX_XATTR);
+ inode_lock_nested(d_inode(xadir), I_MUTEX_XATTR);
xafile = lookup_one_len(name, xadir, strlen(name));
if (IS_ERR(xafile)) {
err = PTR_ERR(xafile);
@@ -404,7 +404,7 @@ static struct dentry *xattr_lookup(struct inode *inode, const char *name,
if (err)
dput(xafile);
out:
- mutex_unlock(&d_inode(xadir)->i_mutex);
+ inode_unlock(d_inode(xadir));
dput(xadir);
if (err)
return ERR_PTR(err);
@@ -469,7 +469,7 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name)
if (IS_ERR(xadir))
return PTR_ERR(xadir);
- mutex_lock_nested(&d_inode(xadir)->i_mutex, I_MUTEX_XATTR);
+ inode_lock_nested(d_inode(xadir), I_MUTEX_XATTR);
dentry = lookup_one_len(name, xadir, strlen(name));
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
@@ -483,7 +483,7 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name)
dput(dentry);
out_dput:
- mutex_unlock(&d_inode(xadir)->i_mutex);
+ inode_unlock(d_inode(xadir));
dput(xadir);
return err;
}
@@ -580,11 +580,11 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
.ia_valid = ATTR_SIZE | ATTR_CTIME,
};
- mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_XATTR);
+ inode_lock_nested(d_inode(dentry), I_MUTEX_XATTR);
inode_dio_wait(d_inode(dentry));
err = reiserfs_setattr(dentry, &newattrs);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
} else
update_ctime(inode);
out_unlock:
@@ -756,7 +756,8 @@ find_xattr_handler_prefix(const struct xattr_handler **handlers,
return NULL;
for_each_xattr_handler(handlers, xah) {
- if (strncmp(xah->prefix, name, strlen(xah->prefix)) == 0)
+ const char *prefix = xattr_prefix(xah);
+ if (strncmp(prefix, name, strlen(prefix)) == 0)
break;
}
@@ -839,19 +840,16 @@ static int listxattr_filler(struct dir_context *ctx, const char *name,
handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr,
name);
- if (!handler) /* Unsupported xattr name */
+ if (!handler /* Unsupported xattr name */ ||
+ (handler->list && !handler->list(b->dentry)))
return 0;
+ size = namelen + 1;
if (b->buf) {
- size = handler->list(handler, b->dentry,
- b->buf + b->pos, b->size, name,
- namelen);
if (size > b->size)
return -ERANGE;
- } else {
- size = handler->list(handler, b->dentry,
- NULL, 0, name, namelen);
+ memcpy(b->buf + b->pos, name, namelen);
+ b->buf[b->pos + namelen] = 0;
}
-
b->pos += size;
}
return 0;
@@ -890,9 +888,9 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
goto out;
}
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_XATTR);
+ inode_lock_nested(d_inode(dir), I_MUTEX_XATTR);
err = reiserfs_readdir_inode(d_inode(dir), &buf.ctx);
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
if (!err)
err = buf.pos;
@@ -907,7 +905,7 @@ static int create_privroot(struct dentry *dentry)
int err;
struct inode *inode = d_inode(dentry->d_parent);
- WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
+ WARN_ON_ONCE(!inode_is_locked(inode));
err = xattr_mkdir(inode, dentry, 0700);
if (err || d_really_is_negative(dentry)) {
@@ -997,7 +995,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
int err = 0;
/* If we don't have the privroot located yet - go find it */
- mutex_lock(&d_inode(s->s_root)->i_mutex);
+ inode_lock(d_inode(s->s_root));
dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
strlen(PRIVROOT_NAME));
if (!IS_ERR(dentry)) {
@@ -1007,7 +1005,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
d_inode(dentry)->i_flags |= S_PRIVATE;
} else
err = PTR_ERR(dentry);
- mutex_unlock(&d_inode(s->s_root)->i_mutex);
+ inode_unlock(d_inode(s->s_root));
return err;
}
@@ -1027,14 +1025,14 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
goto error;
if (d_really_is_negative(privroot) && !(mount_flags & MS_RDONLY)) {
- mutex_lock(&d_inode(s->s_root)->i_mutex);
+ inode_lock(d_inode(s->s_root));
err = create_privroot(REISERFS_SB(s)->priv_root);
- mutex_unlock(&d_inode(s->s_root)->i_mutex);
+ inode_unlock(d_inode(s->s_root));
}
if (d_really_is_positive(privroot)) {
s->s_xattr = reiserfs_xattr_handlers;
- mutex_lock(&d_inode(privroot)->i_mutex);
+ inode_lock(d_inode(privroot));
if (!REISERFS_SB(s)->xattr_root) {
struct dentry *dentry;
@@ -1045,7 +1043,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
else
err = PTR_ERR(dentry);
}
- mutex_unlock(&d_inode(privroot)->i_mutex);
+ inode_unlock(d_inode(privroot));
}
error:
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 4b34b9dc0..558a16bea 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -186,10 +186,10 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
switch (type) {
case ACL_TYPE_ACCESS:
- name = POSIX_ACL_XATTR_ACCESS;
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
break;
case ACL_TYPE_DEFAULT:
- name = POSIX_ACL_XATTR_DEFAULT;
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
BUG();
@@ -244,7 +244,7 @@ __reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
switch (type) {
case ACL_TYPE_ACCESS:
- name = POSIX_ACL_XATTR_ACCESS;
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
if (acl) {
error = posix_acl_equiv_mode(acl, &inode->i_mode);
if (error < 0)
@@ -256,7 +256,7 @@ __reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
}
break;
case ACL_TYPE_DEFAULT:
- name = POSIX_ACL_XATTR_DEFAULT;
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
if (!S_ISDIR(inode->i_mode))
return acl ? -EACCES : 0;
break;
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index ac659af43..ab0217d32 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -34,21 +34,9 @@ security_set(const struct xattr_handler *handler, struct dentry *dentry,
return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags);
}
-static size_t security_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_len,
- const char *name, size_t namelen)
+static bool security_list(struct dentry *dentry)
{
- const size_t len = namelen + 1;
-
- if (IS_PRIVATE(d_inode(dentry)))
- return 0;
-
- if (list && len <= list_len) {
- memcpy(list, name, namelen);
- list[namelen] = '\0';
- }
-
- return len;
+ return !IS_PRIVATE(d_inode(dentry));
}
/* Initializes the security context for a new inode and returns the number
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index a338adf1b..64b67aa64 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -33,20 +33,9 @@ trusted_set(const struct xattr_handler *handler, struct dentry *dentry,
return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags);
}
-static size_t trusted_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
+static bool trusted_list(struct dentry *dentry)
{
- const size_t len = name_len + 1;
-
- if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(d_inode(dentry)))
- return 0;
-
- if (list && len <= list_size) {
- memcpy(list, name, name_len);
- list[name_len] = '\0';
- }
- return len;
+ return capable(CAP_SYS_ADMIN) && !IS_PRIVATE(d_inode(dentry));
}
const struct xattr_handler reiserfs_xattr_trusted_handler = {
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 39c966719..12e6306f5 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -30,19 +30,9 @@ user_set(const struct xattr_handler *handler, struct dentry *dentry,
return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags);
}
-static size_t user_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
+static bool user_list(struct dentry *dentry)
{
- const size_t len = name_len + 1;
-
- if (!reiserfs_xattrs_user(dentry->d_sb))
- return 0;
- if (list && len <= list_size) {
- memcpy(list, name, name_len);
- list[name_len] = '\0';
- }
- return len;
+ return reiserfs_xattrs_user(dentry->d_sb);
}
const struct xattr_handler reiserfs_xattr_user_handler = {
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 268733cda..6b00ca357 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -360,6 +360,7 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos)
break;
case ROMFH_SYM:
i->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(i);
i->i_data.a_ops = &romfs_aops;
mode |= S_IRWXUGO;
break;
@@ -618,8 +619,8 @@ static int __init init_romfs_fs(void)
romfs_inode_cachep =
kmem_cache_create("romfs_i",
sizeof(struct romfs_inode_info), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
- romfs_i_init_once);
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD |
+ SLAB_ACCOUNT, romfs_i_init_once);
if (!romfs_inode_cachep) {
pr_err("Failed to initialise inode cache\n");
diff --git a/fs/select.c b/fs/select.c
index 015547330..79d0d4953 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -778,8 +778,8 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait,
return mask;
}
-static int do_poll(unsigned int nfds, struct poll_list *list,
- struct poll_wqueues *wait, struct timespec *end_time)
+static int do_poll(struct poll_list *list, struct poll_wqueues *wait,
+ struct timespec *end_time)
{
poll_table* pt = &wait->pt;
ktime_t expire, *to = NULL;
@@ -908,7 +908,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
}
poll_initwait(&table);
- fdcount = do_poll(nfds, head, &table, end_time);
+ fdcount = do_poll(head, &table, end_time);
poll_freewait(&table);
for (walk = head; walk; walk = walk->next) {
diff --git a/fs/splice.c b/fs/splice.c
index b0ade1fd4..82bc0d64f 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -415,6 +415,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
*/
if (!page->mapping) {
unlock_page(page);
+retry_lookup:
page = find_or_create_page(mapping, index,
mapping_gfp_mask(mapping));
@@ -439,13 +440,10 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
error = mapping->a_ops->readpage(in, page);
if (unlikely(error)) {
/*
- * We really should re-lookup the page here,
- * but it complicates things a lot. Instead
- * lets just do what we already stored, and
- * we'll get it the next time we are called.
+ * Re-lookup the page
*/
if (error == AOP_TRUNCATED_PAGE)
- error = 0;
+ goto retry_lookup;
break;
}
@@ -1110,8 +1108,8 @@ EXPORT_SYMBOL(generic_splice_sendpage);
/*
* Attempt to initiate a splice from pipe to file.
*/
-long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
- loff_t *ppos, size_t len, unsigned int flags)
+static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
+ loff_t *ppos, size_t len, unsigned int flags)
{
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
loff_t *, size_t, unsigned int);
@@ -1123,14 +1121,13 @@ long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
return splice_write(pipe, out, ppos, len, flags);
}
-EXPORT_SYMBOL_GPL(do_splice_from);
/*
* Attempt to initiate a splice from a file to a pipe.
*/
-long do_splice_to(struct file *in, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t len,
- unsigned int flags)
+static long do_splice_to(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
{
ssize_t (*splice_read)(struct file *, loff_t *,
struct pipe_inode_info *, size_t, unsigned int);
@@ -1150,7 +1147,6 @@ long do_splice_to(struct file *in, loff_t *ppos,
return splice_read(in, ppos, pipe, len, flags);
}
-EXPORT_SYMBOL_GPL(do_splice_to);
/**
* splice_direct_to_actor - splices data directly between two non-pipes
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index a1ce5ce60..0927b1e80 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -41,6 +41,7 @@
#include <linux/fs.h>
#include <linux/vfs.h>
#include <linux/xattr.h>
+#include <linux/pagemap.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
@@ -291,6 +292,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
inode->i_size = le32_to_cpu(sqsh_ino->symlink_size);
inode->i_op = &squashfs_symlink_inode_ops;
+ inode_nohighmem(inode);
inode->i_data.a_ops = &squashfs_symlink_aops;
inode->i_mode |= S_IFLNK;
squashfs_i(inode)->start = block;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 5056babe0..5e79bfa4f 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -80,7 +80,6 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
{
struct squashfs_sb_info *msblk;
struct squashfs_super_block *sblk = NULL;
- char b[BDEVNAME_SIZE];
struct inode *root;
long long root_inode;
unsigned short flags;
@@ -124,8 +123,8 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_magic = le32_to_cpu(sblk->s_magic);
if (sb->s_magic != SQUASHFS_MAGIC) {
if (!silent)
- ERROR("Can't find a SQUASHFS superblock on %s\n",
- bdevname(sb->s_bdev, b));
+ ERROR("Can't find a SQUASHFS superblock on %pg\n",
+ sb->s_bdev);
goto failed_mount;
}
@@ -178,7 +177,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
msblk->inodes = le32_to_cpu(sblk->inodes);
flags = le16_to_cpu(sblk->flags);
- TRACE("Found valid superblock on %s\n", bdevname(sb->s_bdev, b));
+ TRACE("Found valid superblock on %pg\n", sb->s_bdev);
TRACE("Inodes are %scompressed\n", SQUASHFS_UNCOMPRESSED_INODES(flags)
? "un" : "");
TRACE("Data is %scompressed\n", SQUASHFS_UNCOMPRESSED_DATA(flags)
@@ -420,7 +419,8 @@ static int __init init_inodecache(void)
{
squashfs_inode_cachep = kmem_cache_create("squashfs_inode_cache",
sizeof(struct squashfs_inode_info), 0,
- SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, init_once);
+ SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT,
+ init_once);
return squashfs_inode_cachep ? 0 : -ENOMEM;
}
diff --git a/fs/squashfs/symlink.c b/fs/squashfs/symlink.c
index 12806dffb..dbcc2f54b 100644
--- a/fs/squashfs/symlink.c
+++ b/fs/squashfs/symlink.c
@@ -119,8 +119,7 @@ const struct address_space_operations squashfs_symlink_aops = {
const struct inode_operations squashfs_symlink_inode_ops = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.getxattr = generic_getxattr,
.listxattr = squashfs_listxattr
};
diff --git a/fs/squashfs/xattr.c b/fs/squashfs/xattr.c
index 6a4cc3440..1e9de9628 100644
--- a/fs/squashfs/xattr.c
+++ b/fs/squashfs/xattr.c
@@ -58,7 +58,7 @@ ssize_t squashfs_listxattr(struct dentry *d, char *buffer,
struct squashfs_xattr_entry entry;
struct squashfs_xattr_val val;
const struct xattr_handler *handler;
- int name_size, prefix_size = 0;
+ int name_size;
err = squashfs_read_metadata(sb, &entry, &start, &offset,
sizeof(entry));
@@ -67,15 +67,16 @@ ssize_t squashfs_listxattr(struct dentry *d, char *buffer,
name_size = le16_to_cpu(entry.size);
handler = squashfs_xattr_handler(le16_to_cpu(entry.type));
- if (handler)
- prefix_size = handler->list(handler, d, buffer, rest,
- NULL, name_size);
- if (prefix_size) {
+ if (handler && (!handler->list || handler->list(d))) {
+ const char *prefix = handler->prefix ?: handler->name;
+ size_t prefix_size = strlen(prefix);
+
if (buffer) {
if (prefix_size + name_size + 1 > rest) {
err = -ERANGE;
goto failed;
}
+ memcpy(buffer, prefix, prefix_size);
buffer += prefix_size;
}
err = squashfs_read_metadata(sb, buffer, &start,
@@ -212,25 +213,10 @@ failed:
}
-static size_t squashfs_xattr_handler_list(const struct xattr_handler *handler,
- struct dentry *d, char *list,
- size_t list_size, const char *name,
- size_t name_len)
-{
- int len = strlen(handler->prefix);
-
- if (list && len <= list_size)
- memcpy(list, handler->prefix, len);
- return len;
-}
-
static int squashfs_xattr_handler_get(const struct xattr_handler *handler,
struct dentry *d, const char *name,
void *buffer, size_t size)
{
- if (name[0] == '\0')
- return -EINVAL;
-
return squashfs_xattr_get(d_inode(d), handler->flags, name,
buffer, size);
}
@@ -241,22 +227,15 @@ static int squashfs_xattr_handler_get(const struct xattr_handler *handler,
static const struct xattr_handler squashfs_xattr_user_handler = {
.prefix = XATTR_USER_PREFIX,
.flags = SQUASHFS_XATTR_USER,
- .list = squashfs_xattr_handler_list,
.get = squashfs_xattr_handler_get
};
/*
* Trusted namespace support
*/
-static size_t squashfs_trusted_xattr_handler_list(const struct xattr_handler *handler,
- struct dentry *d, char *list,
- size_t list_size, const char *name,
- size_t name_len)
+static bool squashfs_trusted_xattr_handler_list(struct dentry *d)
{
- if (!capable(CAP_SYS_ADMIN))
- return 0;
- return squashfs_xattr_handler_list(handler, d, list, list_size, name,
- name_len);
+ return capable(CAP_SYS_ADMIN);
}
static const struct xattr_handler squashfs_xattr_trusted_handler = {
@@ -272,7 +251,6 @@ static const struct xattr_handler squashfs_xattr_trusted_handler = {
static const struct xattr_handler squashfs_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
.flags = SQUASHFS_XATTR_SECURITY,
- .list = squashfs_xattr_handler_list,
.get = squashfs_xattr_handler_get
};
diff --git a/fs/stat.c b/fs/stat.c
index d4a61d8dc..bc045c799 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -219,7 +219,7 @@ SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, stat
# define choose_32_64(a,b) b
#endif
-#define valid_dev(x) choose_32_64(old_valid_dev,new_valid_dev)(x)
+#define valid_dev(x) choose_32_64(old_valid_dev(x),true)
#define encode_dev(x) choose_32_64(old_encode_dev,new_encode_dev)(x)
#ifndef INIT_STRUCT_STAT_PADDING
diff --git a/fs/super.c b/fs/super.c
index 6746d6087..74914b1ba 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -36,7 +36,7 @@
#include "internal.h"
-LIST_HEAD(super_blocks);
+static LIST_HEAD(super_blocks);
static DEFINE_SPINLOCK(sb_lock);
static char *sb_writers_name[SB_FREEZE_LEVELS] = {
@@ -1013,10 +1013,8 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
blkdev_put(bdev, mode);
down_write(&s->s_umount);
} else {
- char b[BDEVNAME_SIZE];
-
s->s_mode = mode;
- strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
+ snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
sb_set_blocksize(s, block_size(bdev));
error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
if (error) {
@@ -1200,7 +1198,7 @@ int __sb_start_write(struct super_block *sb, int level, bool wait)
else
ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1);
- WARN_ON(force_trylock & !ret);
+ WARN_ON(force_trylock && !ret);
return ret;
}
EXPORT_SYMBOL(__sb_start_write);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 02fa1dcc5..d62c423a5 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -146,8 +146,7 @@ static inline void write3byte(struct sysv_sb_info *sbi,
static const struct inode_operations sysv_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.getattr = sysv_getattr,
};
@@ -163,6 +162,7 @@ void sysv_set_inode(struct inode *inode, dev_t rdev)
inode->i_mapping->a_ops = &sysv_aops;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &sysv_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &sysv_aops;
} else
init_special_inode(inode, inode->i_mode, rdev);
@@ -346,7 +346,7 @@ int __init sysv_init_icache(void)
{
sysv_inode_cachep = kmem_cache_create("sysv_inode_cache",
sizeof(struct sysv_inode_info), 0,
- SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+ SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT,
init_once);
if (!sysv_inode_cachep)
return -ENOMEM;
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index c66f2423e..4a0e48f92 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -84,9 +84,9 @@ static int tracefs_syscall_mkdir(struct inode *inode, struct dentry *dentry, umo
* the files within the tracefs system. It is up to the individual
* mkdir routine to handle races.
*/
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
ret = tracefs_ops.mkdir(name);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
kfree(name);
@@ -109,13 +109,13 @@ static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry)
* This time we need to unlock not only the parent (inode) but
* also the directory that is being deleted.
*/
- mutex_unlock(&inode->i_mutex);
- mutex_unlock(&dentry->d_inode->i_mutex);
+ inode_unlock(inode);
+ inode_unlock(dentry->d_inode);
ret = tracefs_ops.rmdir(name);
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock(&dentry->d_inode->i_mutex);
+ inode_lock_nested(inode, I_MUTEX_PARENT);
+ inode_lock(dentry->d_inode);
kfree(name);
@@ -334,7 +334,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
if (!parent)
parent = tracefs_mount->mnt_root;
- mutex_lock(&parent->d_inode->i_mutex);
+ inode_lock(parent->d_inode);
dentry = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(dentry) && dentry->d_inode) {
dput(dentry);
@@ -342,7 +342,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
}
if (IS_ERR(dentry)) {
- mutex_unlock(&parent->d_inode->i_mutex);
+ inode_unlock(parent->d_inode);
simple_release_fs(&tracefs_mount, &tracefs_mount_count);
}
@@ -351,7 +351,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
static struct dentry *failed_creating(struct dentry *dentry)
{
- mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
+ inode_unlock(dentry->d_parent->d_inode);
dput(dentry);
simple_release_fs(&tracefs_mount, &tracefs_mount_count);
return NULL;
@@ -359,7 +359,7 @@ static struct dentry *failed_creating(struct dentry *dentry)
static struct dentry *end_creating(struct dentry *dentry)
{
- mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
+ inode_unlock(dentry->d_parent->d_inode);
return dentry;
}
@@ -544,9 +544,9 @@ void tracefs_remove(struct dentry *dentry)
if (!parent || !parent->d_inode)
return;
- mutex_lock(&parent->d_inode->i_mutex);
+ inode_lock(parent->d_inode);
ret = __tracefs_remove(dentry, parent);
- mutex_unlock(&parent->d_inode->i_mutex);
+ inode_unlock(parent->d_inode);
if (!ret)
simple_release_fs(&tracefs_mount, &tracefs_mount_count);
}
@@ -572,7 +572,7 @@ void tracefs_remove_recursive(struct dentry *dentry)
parent = dentry;
down:
- mutex_lock(&parent->d_inode->i_mutex);
+ inode_lock(parent->d_inode);
loop:
/*
* The parent->d_subdirs is protected by the d_lock. Outside that
@@ -587,7 +587,7 @@ void tracefs_remove_recursive(struct dentry *dentry)
/* perhaps simple_empty(child) makes more sense */
if (!list_empty(&child->d_subdirs)) {
spin_unlock(&parent->d_lock);
- mutex_unlock(&parent->d_inode->i_mutex);
+ inode_unlock(parent->d_inode);
parent = child;
goto down;
}
@@ -608,10 +608,10 @@ void tracefs_remove_recursive(struct dentry *dentry)
}
spin_unlock(&parent->d_lock);
- mutex_unlock(&parent->d_inode->i_mutex);
+ inode_unlock(parent->d_inode);
child = parent;
parent = parent->d_parent;
- mutex_lock(&parent->d_inode->i_mutex);
+ inode_lock(parent->d_inode);
if (child != dentry)
/* go up */
@@ -619,7 +619,7 @@ void tracefs_remove_recursive(struct dentry *dentry)
if (!__tracefs_remove(child, parent))
simple_release_fs(&tracefs_mount, &tracefs_mount_count);
- mutex_unlock(&parent->d_inode->i_mutex);
+ inode_unlock(parent->d_inode);
}
/**
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index e49bd2808..795992a83 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -515,8 +515,8 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
dbg_gen("dent '%pd' to ino %lu (nlink %d) in dir ino %lu",
dentry, inode->i_ino,
inode->i_nlink, dir->i_ino);
- ubifs_assert(mutex_is_locked(&dir->i_mutex));
- ubifs_assert(mutex_is_locked(&inode->i_mutex));
+ ubifs_assert(inode_is_locked(dir));
+ ubifs_assert(inode_is_locked(inode));
err = dbg_check_synced_i_size(c, inode);
if (err)
@@ -572,8 +572,8 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
dbg_gen("dent '%pd' from ino %lu (nlink %d) in dir ino %lu",
dentry, inode->i_ino,
inode->i_nlink, dir->i_ino);
- ubifs_assert(mutex_is_locked(&dir->i_mutex));
- ubifs_assert(mutex_is_locked(&inode->i_mutex));
+ ubifs_assert(inode_is_locked(dir));
+ ubifs_assert(inode_is_locked(inode));
err = dbg_check_synced_i_size(c, inode);
if (err)
return err;
@@ -661,8 +661,8 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
dbg_gen("directory '%pd', ino %lu in dir ino %lu", dentry,
inode->i_ino, dir->i_ino);
- ubifs_assert(mutex_is_locked(&dir->i_mutex));
- ubifs_assert(mutex_is_locked(&inode->i_mutex));
+ ubifs_assert(inode_is_locked(dir));
+ ubifs_assert(inode_is_locked(inode));
err = check_dir_empty(c, d_inode(dentry));
if (err)
return err;
@@ -996,10 +996,10 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu",
old_dentry, old_inode->i_ino, old_dir->i_ino,
new_dentry, new_dir->i_ino);
- ubifs_assert(mutex_is_locked(&old_dir->i_mutex));
- ubifs_assert(mutex_is_locked(&new_dir->i_mutex));
+ ubifs_assert(inode_is_locked(old_dir));
+ ubifs_assert(inode_is_locked(new_dir));
if (unlink)
- ubifs_assert(mutex_is_locked(&new_inode->i_mutex));
+ ubifs_assert(inode_is_locked(new_inode));
if (unlink && is_dir) {
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 0edc12856..065c88f8e 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1317,7 +1317,7 @@ int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
err = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (err)
return err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* Synchronize the inode unless this is a 'datasync()' call. */
if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) {
@@ -1332,7 +1332,7 @@ int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
*/
err = ubifs_sync_wbufs_by_inode(c, inode);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
@@ -1608,7 +1608,7 @@ const struct inode_operations ubifs_file_inode_operations = {
const struct inode_operations ubifs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = simple_follow_link,
+ .get_link = simple_get_link,
.setattr = ubifs_setattr,
.getattr = ubifs_getattr,
.setxattr = ubifs_setxattr,
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h
index 92a8491a8..c0a95e393 100644
--- a/fs/ubifs/key.h
+++ b/fs/ubifs/key.h
@@ -34,6 +34,12 @@
* node. We use "r5" hash borrowed from reiserfs.
*/
+/*
+ * Lot's of the key helpers require a struct ubifs_info *c as the first parameter.
+ * But we are not using it at all currently. That's designed for future extensions of
+ * different c->key_format. But right now, there is only one key type, UBIFS_SIMPLE_KEY_FMT.
+ */
+
#ifndef __UBIFS_KEY_H__
#define __UBIFS_KEY_H__
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 1fd90c079..a233ba913 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2248,8 +2248,8 @@ static int __init ubifs_init(void)
ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab",
sizeof(struct ubifs_inode), 0,
- SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT,
- &inode_slab_ctor);
+ SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT |
+ SLAB_ACCOUNT, &inode_slab_ctor);
if (!ubifs_inode_slab)
return -ENOMEM;
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index e8b01b721..c7f4d434d 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -267,7 +267,7 @@ static int check_namespace(const struct qstr *nm)
if (!strncmp(nm->name, XATTR_TRUSTED_PREFIX,
XATTR_TRUSTED_PREFIX_LEN)) {
- if (nm->name[sizeof(XATTR_TRUSTED_PREFIX) - 1] == '\0')
+ if (nm->name[XATTR_TRUSTED_PREFIX_LEN] == '\0')
return -EINVAL;
type = TRUSTED_XATTR;
} else if (!strncmp(nm->name, XATTR_USER_PREFIX,
@@ -277,7 +277,7 @@ static int check_namespace(const struct qstr *nm)
type = USER_XATTR;
} else if (!strncmp(nm->name, XATTR_SECURITY_PREFIX,
XATTR_SECURITY_PREFIX_LEN)) {
- if (nm->name[sizeof(XATTR_SECURITY_PREFIX) - 1] == '\0')
+ if (nm->name[XATTR_SECURITY_PREFIX_LEN] == '\0')
return -EINVAL;
type = SECURITY_XATTR;
} else
@@ -313,7 +313,7 @@ static int setxattr(struct inode *host, const char *name, const void *value,
union ubifs_key key;
int err, type;
- ubifs_assert(mutex_is_locked(&host->i_mutex));
+ ubifs_assert(inode_is_locked(host));
if (size > UBIFS_MAX_INO_DATA)
return -ERANGE;
@@ -550,7 +550,7 @@ int ubifs_removexattr(struct dentry *dentry, const char *name)
dbg_gen("xattr '%s', ino %lu ('%pd')", name,
host->i_ino, dentry);
- ubifs_assert(mutex_is_locked(&host->i_mutex));
+ ubifs_assert(inode_is_locked(host));
err = check_namespace(&nm);
if (err < 0)
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 6d6a96b4e..e0fd65fe7 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -447,9 +447,6 @@ static void udf_table_free_blocks(struct super_block *sb,
*/
int adsize;
- struct short_ad *sad = NULL;
- struct long_ad *lad = NULL;
- struct allocExtDesc *aed;
eloc.logicalBlockNum = start;
elen = EXT_RECORDED_ALLOCATED |
@@ -466,102 +463,17 @@ static void udf_table_free_blocks(struct super_block *sb,
}
if (epos.offset + (2 * adsize) > sb->s_blocksize) {
- unsigned char *sptr, *dptr;
- int loffset;
-
- brelse(oepos.bh);
- oepos = epos;
-
/* Steal a block from the extent being free'd */
- epos.block.logicalBlockNum = eloc.logicalBlockNum;
+ udf_setup_indirect_aext(table, eloc.logicalBlockNum,
+ &epos);
+
eloc.logicalBlockNum++;
elen -= sb->s_blocksize;
-
- epos.bh = udf_tread(sb,
- udf_get_lb_pblock(sb, &epos.block, 0));
- if (!epos.bh) {
- brelse(oepos.bh);
- goto error_return;
- }
- aed = (struct allocExtDesc *)(epos.bh->b_data);
- aed->previousAllocExtLocation =
- cpu_to_le32(oepos.block.logicalBlockNum);
- if (epos.offset + adsize > sb->s_blocksize) {
- loffset = epos.offset;
- aed->lengthAllocDescs = cpu_to_le32(adsize);
- sptr = iinfo->i_ext.i_data + epos.offset
- - adsize;
- dptr = epos.bh->b_data +
- sizeof(struct allocExtDesc);
- memcpy(dptr, sptr, adsize);
- epos.offset = sizeof(struct allocExtDesc) +
- adsize;
- } else {
- loffset = epos.offset + adsize;
- aed->lengthAllocDescs = cpu_to_le32(0);
- if (oepos.bh) {
- sptr = oepos.bh->b_data + epos.offset;
- aed = (struct allocExtDesc *)
- oepos.bh->b_data;
- le32_add_cpu(&aed->lengthAllocDescs,
- adsize);
- } else {
- sptr = iinfo->i_ext.i_data +
- epos.offset;
- iinfo->i_lenAlloc += adsize;
- mark_inode_dirty(table);
- }
- epos.offset = sizeof(struct allocExtDesc);
- }
- if (sbi->s_udfrev >= 0x0200)
- udf_new_tag(epos.bh->b_data, TAG_IDENT_AED,
- 3, 1, epos.block.logicalBlockNum,
- sizeof(struct tag));
- else
- udf_new_tag(epos.bh->b_data, TAG_IDENT_AED,
- 2, 1, epos.block.logicalBlockNum,
- sizeof(struct tag));
-
- switch (iinfo->i_alloc_type) {
- case ICBTAG_FLAG_AD_SHORT:
- sad = (struct short_ad *)sptr;
- sad->extLength = cpu_to_le32(
- EXT_NEXT_EXTENT_ALLOCDECS |
- sb->s_blocksize);
- sad->extPosition =
- cpu_to_le32(epos.block.logicalBlockNum);
- break;
- case ICBTAG_FLAG_AD_LONG:
- lad = (struct long_ad *)sptr;
- lad->extLength = cpu_to_le32(
- EXT_NEXT_EXTENT_ALLOCDECS |
- sb->s_blocksize);
- lad->extLocation =
- cpu_to_lelb(epos.block);
- break;
- }
- if (oepos.bh) {
- udf_update_tag(oepos.bh->b_data, loffset);
- mark_buffer_dirty(oepos.bh);
- } else {
- mark_inode_dirty(table);
- }
}
/* It's possible that stealing the block emptied the extent */
- if (elen) {
- udf_write_aext(table, &epos, &eloc, elen, 1);
-
- if (!epos.bh) {
- iinfo->i_lenAlloc += adsize;
- mark_inode_dirty(table);
- } else {
- aed = (struct allocExtDesc *)epos.bh->b_data;
- le32_add_cpu(&aed->lengthAllocDescs, adsize);
- udf_update_tag(epos.bh->b_data, epos.offset);
- mark_buffer_dirty(epos.bh);
- }
- }
+ if (elen)
+ __udf_add_aext(table, &epos, &eloc, elen, 1);
}
brelse(epos.bh);
diff --git a/fs/udf/file.c b/fs/udf/file.c
index bddf3d071..1af98963d 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -122,7 +122,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct udf_inode_info *iinfo = UDF_I(inode);
int err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
retval = generic_write_checks(iocb, from);
if (retval <= 0)
@@ -136,7 +136,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
(udf_file_entry_alloc_offset(inode) + end)) {
err = udf_expand_file_adinicb(inode);
if (err) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
udf_debug("udf_expand_adinicb: err=%d\n", err);
return err;
}
@@ -149,7 +149,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
retval = __generic_file_write_iter(iocb, from);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (retval > 0) {
mark_inode_dirty(inode);
@@ -223,12 +223,12 @@ static int udf_release_file(struct inode *inode, struct file *filp)
* Grab i_mutex to avoid races with writes changing i_size
* while we are running.
*/
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
down_write(&UDF_I(inode)->i_data_sem);
udf_discard_prealloc(inode);
udf_truncate_tail_extent(inode);
up_write(&UDF_I(inode)->i_data_sem);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
return 0;
}
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 566df9b5a..166d3ed32 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -262,7 +262,7 @@ int udf_expand_file_adinicb(struct inode *inode)
.nr_to_write = 1,
};
- WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
+ WARN_ON_ONCE(!inode_is_locked(inode));
if (!iinfo->i_lenAlloc) {
if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD))
iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT;
@@ -539,9 +539,18 @@ static int udf_do_extend_file(struct inode *inode,
udf_add_aext(inode, last_pos, &last_ext->extLocation,
last_ext->extLength, 1);
count++;
- } else
+ } else {
+ struct kernel_lb_addr tmploc;
+ uint32_t tmplen;
+
udf_write_aext(inode, last_pos, &last_ext->extLocation,
last_ext->extLength, 1);
+ /*
+ * We've rewritten the last extent but there may be empty
+ * indirect extent after it - enter it.
+ */
+ udf_next_aext(inode, last_pos, &tmploc, &tmplen, 0);
+ }
/* Managed to do everything necessary? */
if (!blocks)
@@ -1540,7 +1549,8 @@ reread:
break;
case ICBTAG_FILE_TYPE_SYMLINK:
inode->i_data.a_ops = &udf_symlink_aops;
- inode->i_op = &udf_symlink_inode_operations;
+ inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mode = S_IFLNK | S_IRWXUGO;
break;
case ICBTAG_FILE_TYPE_MAIN:
@@ -1866,22 +1876,90 @@ struct inode *__udf_iget(struct super_block *sb, struct kernel_lb_addr *ino,
return inode;
}
-int udf_add_aext(struct inode *inode, struct extent_position *epos,
- struct kernel_lb_addr *eloc, uint32_t elen, int inc)
+int udf_setup_indirect_aext(struct inode *inode, int block,
+ struct extent_position *epos)
{
- int adsize;
- struct short_ad *sad = NULL;
- struct long_ad *lad = NULL;
+ struct super_block *sb = inode->i_sb;
+ struct buffer_head *bh;
struct allocExtDesc *aed;
- uint8_t *ptr;
- struct udf_inode_info *iinfo = UDF_I(inode);
+ struct extent_position nepos;
+ struct kernel_lb_addr neloc;
+ int ver, adsize;
- if (!epos->bh)
- ptr = iinfo->i_ext.i_data + epos->offset -
- udf_file_entry_alloc_offset(inode) +
- iinfo->i_lenEAttr;
+ if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
+ adsize = sizeof(struct short_ad);
+ else if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_LONG)
+ adsize = sizeof(struct long_ad);
else
- ptr = epos->bh->b_data + epos->offset;
+ return -EIO;
+
+ neloc.logicalBlockNum = block;
+ neloc.partitionReferenceNum = epos->block.partitionReferenceNum;
+
+ bh = udf_tgetblk(sb, udf_get_lb_pblock(sb, &neloc, 0));
+ if (!bh)
+ return -EIO;
+ lock_buffer(bh);
+ memset(bh->b_data, 0x00, sb->s_blocksize);
+ set_buffer_uptodate(bh);
+ unlock_buffer(bh);
+ mark_buffer_dirty_inode(bh, inode);
+
+ aed = (struct allocExtDesc *)(bh->b_data);
+ if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT)) {
+ aed->previousAllocExtLocation =
+ cpu_to_le32(epos->block.logicalBlockNum);
+ }
+ aed->lengthAllocDescs = cpu_to_le32(0);
+ if (UDF_SB(sb)->s_udfrev >= 0x0200)
+ ver = 3;
+ else
+ ver = 2;
+ udf_new_tag(bh->b_data, TAG_IDENT_AED, ver, 1, block,
+ sizeof(struct tag));
+
+ nepos.block = neloc;
+ nepos.offset = sizeof(struct allocExtDesc);
+ nepos.bh = bh;
+
+ /*
+ * Do we have to copy current last extent to make space for indirect
+ * one?
+ */
+ if (epos->offset + adsize > sb->s_blocksize) {
+ struct kernel_lb_addr cp_loc;
+ uint32_t cp_len;
+ int cp_type;
+
+ epos->offset -= adsize;
+ cp_type = udf_current_aext(inode, epos, &cp_loc, &cp_len, 0);
+ cp_len |= ((uint32_t)cp_type) << 30;
+
+ __udf_add_aext(inode, &nepos, &cp_loc, cp_len, 1);
+ udf_write_aext(inode, epos, &nepos.block,
+ sb->s_blocksize | EXT_NEXT_EXTENT_ALLOCDECS, 0);
+ } else {
+ __udf_add_aext(inode, epos, &nepos.block,
+ sb->s_blocksize | EXT_NEXT_EXTENT_ALLOCDECS, 0);
+ }
+
+ brelse(epos->bh);
+ *epos = nepos;
+
+ return 0;
+}
+
+/*
+ * Append extent at the given position - should be the first free one in inode
+ * / indirect extent. This function assumes there is enough space in the inode
+ * or indirect extent. Use udf_add_aext() if you didn't check for this before.
+ */
+int __udf_add_aext(struct inode *inode, struct extent_position *epos,
+ struct kernel_lb_addr *eloc, uint32_t elen, int inc)
+{
+ struct udf_inode_info *iinfo = UDF_I(inode);
+ struct allocExtDesc *aed;
+ int adsize;
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
adsize = sizeof(struct short_ad);
@@ -1890,88 +1968,14 @@ int udf_add_aext(struct inode *inode, struct extent_position *epos,
else
return -EIO;
- if (epos->offset + (2 * adsize) > inode->i_sb->s_blocksize) {
- unsigned char *sptr, *dptr;
- struct buffer_head *nbh;
- int err, loffset;
- struct kernel_lb_addr obloc = epos->block;
-
- epos->block.logicalBlockNum = udf_new_block(inode->i_sb, NULL,
- obloc.partitionReferenceNum,
- obloc.logicalBlockNum, &err);
- if (!epos->block.logicalBlockNum)
- return -ENOSPC;
- nbh = udf_tgetblk(inode->i_sb, udf_get_lb_pblock(inode->i_sb,
- &epos->block,
- 0));
- if (!nbh)
- return -EIO;
- lock_buffer(nbh);
- memset(nbh->b_data, 0x00, inode->i_sb->s_blocksize);
- set_buffer_uptodate(nbh);
- unlock_buffer(nbh);
- mark_buffer_dirty_inode(nbh, inode);
-
- aed = (struct allocExtDesc *)(nbh->b_data);
- if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT))
- aed->previousAllocExtLocation =
- cpu_to_le32(obloc.logicalBlockNum);
- if (epos->offset + adsize > inode->i_sb->s_blocksize) {
- loffset = epos->offset;
- aed->lengthAllocDescs = cpu_to_le32(adsize);
- sptr = ptr - adsize;
- dptr = nbh->b_data + sizeof(struct allocExtDesc);
- memcpy(dptr, sptr, adsize);
- epos->offset = sizeof(struct allocExtDesc) + adsize;
- } else {
- loffset = epos->offset + adsize;
- aed->lengthAllocDescs = cpu_to_le32(0);
- sptr = ptr;
- epos->offset = sizeof(struct allocExtDesc);
-
- if (epos->bh) {
- aed = (struct allocExtDesc *)epos->bh->b_data;
- le32_add_cpu(&aed->lengthAllocDescs, adsize);
- } else {
- iinfo->i_lenAlloc += adsize;
- mark_inode_dirty(inode);
- }
- }
- if (UDF_SB(inode->i_sb)->s_udfrev >= 0x0200)
- udf_new_tag(nbh->b_data, TAG_IDENT_AED, 3, 1,
- epos->block.logicalBlockNum, sizeof(struct tag));
- else
- udf_new_tag(nbh->b_data, TAG_IDENT_AED, 2, 1,
- epos->block.logicalBlockNum, sizeof(struct tag));
- switch (iinfo->i_alloc_type) {
- case ICBTAG_FLAG_AD_SHORT:
- sad = (struct short_ad *)sptr;
- sad->extLength = cpu_to_le32(EXT_NEXT_EXTENT_ALLOCDECS |
- inode->i_sb->s_blocksize);
- sad->extPosition =
- cpu_to_le32(epos->block.logicalBlockNum);
- break;
- case ICBTAG_FLAG_AD_LONG:
- lad = (struct long_ad *)sptr;
- lad->extLength = cpu_to_le32(EXT_NEXT_EXTENT_ALLOCDECS |
- inode->i_sb->s_blocksize);
- lad->extLocation = cpu_to_lelb(epos->block);
- memset(lad->impUse, 0x00, sizeof(lad->impUse));
- break;
- }
- if (epos->bh) {
- if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) ||
- UDF_SB(inode->i_sb)->s_udfrev >= 0x0201)
- udf_update_tag(epos->bh->b_data, loffset);
- else
- udf_update_tag(epos->bh->b_data,
- sizeof(struct allocExtDesc));
- mark_buffer_dirty_inode(epos->bh, inode);
- brelse(epos->bh);
- } else {
- mark_inode_dirty(inode);
- }
- epos->bh = nbh;
+ if (!epos->bh) {
+ WARN_ON(iinfo->i_lenAlloc !=
+ epos->offset - udf_file_entry_alloc_offset(inode));
+ } else {
+ aed = (struct allocExtDesc *)epos->bh->b_data;
+ WARN_ON(le32_to_cpu(aed->lengthAllocDescs) !=
+ epos->offset - sizeof(struct allocExtDesc));
+ WARN_ON(epos->offset + adsize > inode->i_sb->s_blocksize);
}
udf_write_aext(inode, epos, eloc, elen, inc);
@@ -1995,6 +1999,41 @@ int udf_add_aext(struct inode *inode, struct extent_position *epos,
return 0;
}
+/*
+ * Append extent at given position - should be the first free one in inode
+ * / indirect extent. Takes care of allocating and linking indirect blocks.
+ */
+int udf_add_aext(struct inode *inode, struct extent_position *epos,
+ struct kernel_lb_addr *eloc, uint32_t elen, int inc)
+{
+ int adsize;
+ struct super_block *sb = inode->i_sb;
+
+ if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
+ adsize = sizeof(struct short_ad);
+ else if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_LONG)
+ adsize = sizeof(struct long_ad);
+ else
+ return -EIO;
+
+ if (epos->offset + (2 * adsize) > sb->s_blocksize) {
+ int err;
+ int new_block;
+
+ new_block = udf_new_block(sb, NULL,
+ epos->block.partitionReferenceNum,
+ epos->block.logicalBlockNum, &err);
+ if (!new_block)
+ return -ENOSPC;
+
+ err = udf_setup_indirect_aext(inode, new_block, epos);
+ if (err)
+ return err;
+ }
+
+ return __udf_add_aext(inode, epos, eloc, elen, inc);
+}
+
void udf_write_aext(struct inode *inode, struct extent_position *epos,
struct kernel_lb_addr *eloc, uint32_t elen, int inc)
{
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index c97b5a8d1..42eafb91f 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -921,7 +921,8 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
}
inode->i_data.a_ops = &udf_symlink_aops;
- inode->i_op = &udf_symlink_inode_operations;
+ inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
struct kernel_lb_addr eloc;
@@ -1344,8 +1345,3 @@ const struct inode_operations udf_dir_inode_operations = {
.rename = udf_rename,
.tmpfile = udf_tmpfile,
};
-const struct inode_operations udf_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
-};
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 81155b9b4..a522c15a0 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -179,7 +179,8 @@ static int __init init_inodecache(void)
udf_inode_cachep = kmem_cache_create("udf_inode_cache",
sizeof(struct udf_inode_info),
0, (SLAB_RECLAIM_ACCOUNT |
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD |
+ SLAB_ACCOUNT),
init_once);
if (!udf_inode_cachep)
return -ENOMEM;
@@ -278,17 +279,12 @@ static void udf_sb_free_bitmap(struct udf_bitmap *bitmap)
{
int i;
int nr_groups = bitmap->s_nr_groups;
- int size = sizeof(struct udf_bitmap) + (sizeof(struct buffer_head *) *
- nr_groups);
for (i = 0; i < nr_groups; i++)
if (bitmap->s_block_bitmap[i])
brelse(bitmap->s_block_bitmap[i]);
- if (size <= PAGE_SIZE)
- kfree(bitmap);
- else
- vfree(bitmap);
+ kvfree(bitmap);
}
static void udf_free_partition(struct udf_part_map *map)
@@ -1586,6 +1582,13 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_
}
/*
+ * Maximum number of Terminating Descriptor redirections. The chosen number is
+ * arbitrary - just that we hopefully don't limit any real use of rewritten
+ * inode on write-once media but avoid looping for too long on corrupted media.
+ */
+#define UDF_MAX_TD_NESTING 64
+
+/*
* Process a main/reserve volume descriptor sequence.
* @block First block of first extent of the sequence.
* @lastblock Lastblock of first extent of the sequence.
@@ -1609,6 +1612,7 @@ static noinline int udf_process_sequence(
uint16_t ident;
long next_s = 0, next_e = 0;
int ret;
+ unsigned int indirections = 0;
memset(vds, 0, sizeof(struct udf_vds_record) * VDS_POS_LENGTH);
@@ -1679,6 +1683,12 @@ static noinline int udf_process_sequence(
}
break;
case TAG_IDENT_TD: /* ISO 13346 3/10.9 */
+ if (++indirections > UDF_MAX_TD_NESTING) {
+ udf_err(sb, "too many TDs (max %u supported)\n", UDF_MAX_TD_NESTING);
+ brelse(bh);
+ return -EIO;
+ }
+
vds[VDS_POS_TERMINATING_DESC].block = block;
if (next_e) {
block = next_s;
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index 862535b3b..8d6197730 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -107,7 +107,7 @@ static int udf_symlink_filler(struct file *file, struct page *page)
struct buffer_head *bh = NULL;
unsigned char *symlink;
int err;
- unsigned char *p = kmap(page);
+ unsigned char *p = page_address(page);
struct udf_inode_info *iinfo;
uint32_t pos;
@@ -141,7 +141,6 @@ static int udf_symlink_filler(struct file *file, struct page *page)
up_read(&iinfo->i_data_sem);
SetPageUptodate(page);
- kunmap(page);
unlock_page(page);
return 0;
@@ -149,7 +148,6 @@ out_unlock_inode:
up_read(&iinfo->i_data_sem);
SetPageError(page);
out_unmap:
- kunmap(page);
unlock_page(page);
return err;
}
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 47bb3f5ca..fa0044b6b 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -85,7 +85,6 @@ extern const struct inode_operations udf_dir_inode_operations;
extern const struct file_operations udf_dir_operations;
extern const struct inode_operations udf_file_inode_operations;
extern const struct file_operations udf_file_operations;
-extern const struct inode_operations udf_symlink_inode_operations;
extern const struct address_space_operations udf_aops;
extern const struct address_space_operations udf_adinicb_aops;
extern const struct address_space_operations udf_symlink_aops;
@@ -159,6 +158,10 @@ extern int udf_write_inode(struct inode *, struct writeback_control *wbc);
extern long udf_block_map(struct inode *, sector_t);
extern int8_t inode_bmap(struct inode *, sector_t, struct extent_position *,
struct kernel_lb_addr *, uint32_t *, sector_t *);
+extern int udf_setup_indirect_aext(struct inode *inode, int block,
+ struct extent_position *epos);
+extern int __udf_add_aext(struct inode *inode, struct extent_position *epos,
+ struct kernel_lb_addr *eloc, uint32_t elen, int inc);
extern int udf_add_aext(struct inode *, struct extent_position *,
struct kernel_lb_addr *, uint32_t, int);
extern void udf_write_aext(struct inode *, struct extent_position *,
diff --git a/fs/ufs/Makefile b/fs/ufs/Makefile
index 392db25c0..ec4a6b49f 100644
--- a/fs/ufs/Makefile
+++ b/fs/ufs/Makefile
@@ -5,5 +5,5 @@
obj-$(CONFIG_UFS_FS) += ufs.o
ufs-objs := balloc.o cylinder.o dir.o file.o ialloc.o inode.o \
- namei.o super.o symlink.o util.o
+ namei.o super.o util.o
ccflags-$(CONFIG_UFS_DEBUG) += -DDEBUG
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index a064cf44b..d897e169a 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -528,11 +528,12 @@ static void ufs_set_inode_ops(struct inode *inode)
inode->i_mapping->a_ops = &ufs_aops;
} else if (S_ISLNK(inode->i_mode)) {
if (!inode->i_blocks) {
- inode->i_op = &ufs_fast_symlink_inode_operations;
inode->i_link = (char *)UFS_I(inode)->i_u1.i_symlink;
+ inode->i_op = &simple_symlink_inode_operations;
} else {
- inode->i_op = &ufs_symlink_inode_operations;
inode->i_mapping->a_ops = &ufs_aops;
+ inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
}
} else
init_special_inode(inode, inode->i_mode,
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 479665543..acf4a3b61 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -123,14 +123,15 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
if (l > UFS_SB(sb)->s_uspi->s_maxsymlinklen) {
/* slow symlink */
- inode->i_op = &ufs_symlink_inode_operations;
+ inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &ufs_aops;
err = page_symlink(inode, symname, l);
if (err)
goto out_fail;
} else {
/* fast symlink */
- inode->i_op = &ufs_fast_symlink_inode_operations;
+ inode->i_op = &simple_symlink_inode_operations;
inode->i_link = (char *)UFS_I(inode)->i_u1.i_symlink;
memcpy(inode->i_link, symname, l);
inode->i_size = l-1;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index f6390eec0..442fd52eb 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1427,7 +1427,7 @@ static int __init init_inodecache(void)
ufs_inode_cachep = kmem_cache_create("ufs_inode_cache",
sizeof(struct ufs_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
init_once);
if (ufs_inode_cachep == NULL)
return -ENOMEM;
diff --git a/fs/ufs/symlink.c b/fs/ufs/symlink.c
deleted file mode 100644
index 874480bb4..000000000
--- a/fs/ufs/symlink.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * linux/fs/ufs/symlink.c
- *
- * Only fast symlinks left here - the rest is done by generic code. AV, 1999
- *
- * Copyright (C) 1998
- * Daniel Pirkl <daniel.pirkl@emai.cz>
- * Charles University, Faculty of Mathematics and Physics
- *
- * from
- *
- * linux/fs/ext2/symlink.c
- *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- *
- * from
- *
- * linux/fs/minix/symlink.c
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- *
- * ext2 symlink handling code
- */
-
-#include "ufs_fs.h"
-#include "ufs.h"
-
-const struct inode_operations ufs_fast_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = simple_follow_link,
- .setattr = ufs_setattr,
-};
-
-const struct inode_operations ufs_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
- .setattr = ufs_setattr,
-};
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 7da4aca86..c87f4c3fa 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -136,10 +136,6 @@ extern __printf(3, 4)
void ufs_panic(struct super_block *, const char *, const char *, ...);
void ufs_mark_sb_dirty(struct super_block *sb);
-/* symlink.c */
-extern const struct inode_operations ufs_fast_symlink_inode_operations;
-extern const struct inode_operations ufs_symlink_inode_operations;
-
static inline struct ufs_sb_info *UFS_SB(struct super_block *sb)
{
return sb->s_fs_info;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 503117031..66cdb4461 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -287,6 +287,12 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
goto out;
/*
+ * We don't do userfault handling for the final child pid update.
+ */
+ if (current->flags & PF_EXITING)
+ goto out;
+
+ /*
* Check that we can return VM_FAULT_RETRY.
*
* NOTE: it should become possible to return VM_FAULT_RETRY
diff --git a/fs/utimes.c b/fs/utimes.c
index aa138d645..85c40f4f3 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -103,9 +103,9 @@ static int utimes_common(struct path *path, struct timespec *times)
}
}
retry_deleg:
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
error = notify_change(path->dentry, &newattrs, &delegated_inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (delegated_inode) {
error = break_deleg_wait(&delegated_inode);
if (!error)
diff --git a/fs/xattr.c b/fs/xattr.c
index 0c317c4fd..4861322e2 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -129,7 +129,7 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
if (error)
return error;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
error = security_inode_setxattr(dentry, name, value, size, flags);
if (error)
goto out;
@@ -137,7 +137,7 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
error = __vfs_setxattr_noperm(dentry, name, value, size, flags);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return error;
}
EXPORT_SYMBOL_GPL(vfs_setxattr);
@@ -207,26 +207,6 @@ vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value,
*xattr_value = value;
return error;
}
-EXPORT_SYMBOL_GPL(vfs_getxattr_alloc);
-
-/* Compare an extended attribute value with the given value */
-int vfs_xattr_cmp(struct dentry *dentry, const char *xattr_name,
- const char *value, size_t size, gfp_t flags)
-{
- char *xattr_value = NULL;
- int rc;
-
- rc = vfs_getxattr_alloc(dentry, xattr_name, &xattr_value, 0, flags);
- if (rc < 0)
- return rc;
-
- if ((rc != size) || (memcmp(xattr_value, value, rc) != 0))
- rc = -EINVAL;
- else
- rc = 0;
- kfree(xattr_value);
- return rc;
-}
ssize_t
vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size)
@@ -297,7 +277,7 @@ vfs_removexattr(struct dentry *dentry, const char *name)
if (error)
return error;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
error = security_inode_removexattr(dentry, name);
if (error)
goto out;
@@ -310,7 +290,7 @@ vfs_removexattr(struct dentry *dentry, const char *name)
}
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return error;
}
EXPORT_SYMBOL_GPL(vfs_removexattr);
@@ -325,7 +305,6 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
{
int error;
void *kvalue = NULL;
- void *vvalue = NULL; /* If non-NULL, we used vmalloc() */
char kname[XATTR_NAME_MAX + 1];
if (flags & ~(XATTR_CREATE|XATTR_REPLACE))
@@ -342,10 +321,9 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
return -E2BIG;
kvalue = kmalloc(size, GFP_KERNEL | __GFP_NOWARN);
if (!kvalue) {
- vvalue = vmalloc(size);
- if (!vvalue)
+ kvalue = vmalloc(size);
+ if (!kvalue)
return -ENOMEM;
- kvalue = vvalue;
}
if (copy_from_user(kvalue, value, size)) {
error = -EFAULT;
@@ -358,10 +336,8 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
error = vfs_setxattr(d, kname, kvalue, size, flags);
out:
- if (vvalue)
- vfree(vvalue);
- else
- kfree(kvalue);
+ kvfree(kvalue);
+
return error;
}
@@ -429,7 +405,6 @@ getxattr(struct dentry *d, const char __user *name, void __user *value,
{
ssize_t error;
void *kvalue = NULL;
- void *vvalue = NULL;
char kname[XATTR_NAME_MAX + 1];
error = strncpy_from_user(kname, name, sizeof(kname));
@@ -443,10 +418,9 @@ getxattr(struct dentry *d, const char __user *name, void __user *value,
size = XATTR_SIZE_MAX;
kvalue = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
if (!kvalue) {
- vvalue = vmalloc(size);
- if (!vvalue)
+ kvalue = vmalloc(size);
+ if (!kvalue)
return -ENOMEM;
- kvalue = vvalue;
}
}
@@ -462,10 +436,9 @@ getxattr(struct dentry *d, const char __user *name, void __user *value,
than XATTR_SIZE_MAX bytes. Not possible. */
error = -E2BIG;
}
- if (vvalue)
- vfree(vvalue);
- else
- kfree(kvalue);
+
+ kvfree(kvalue);
+
return error;
}
@@ -522,17 +495,15 @@ listxattr(struct dentry *d, char __user *list, size_t size)
{
ssize_t error;
char *klist = NULL;
- char *vlist = NULL; /* If non-NULL, we used vmalloc() */
if (size) {
if (size > XATTR_LIST_MAX)
size = XATTR_LIST_MAX;
klist = kmalloc(size, __GFP_NOWARN | GFP_KERNEL);
if (!klist) {
- vlist = vmalloc(size);
- if (!vlist)
+ klist = vmalloc(size);
+ if (!klist)
return -ENOMEM;
- klist = vlist;
}
}
@@ -545,10 +516,9 @@ listxattr(struct dentry *d, char __user *list, size_t size)
than XATTR_LIST_MAX bytes. Not possible. */
error = -E2BIG;
}
- if (vlist)
- vfree(vlist);
- else
- kfree(klist);
+
+ kvfree(klist);
+
return error;
}
@@ -701,13 +671,20 @@ xattr_resolve_name(const struct xattr_handler **handlers, const char **name)
return NULL;
for_each_xattr_handler(handlers, handler) {
- const char *n = strcmp_prefix(*name, handler->prefix);
+ const char *n;
+
+ n = strcmp_prefix(*name, xattr_prefix(handler));
if (n) {
+ if (!handler->prefix ^ !*n) {
+ if (*n)
+ continue;
+ return ERR_PTR(-EINVAL);
+ }
*name = n;
- break;
+ return handler;
}
}
- return handler;
+ return ERR_PTR(-EOPNOTSUPP);
}
/*
@@ -719,8 +696,8 @@ generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t s
const struct xattr_handler *handler;
handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
- if (!handler)
- return -EOPNOTSUPP;
+ if (IS_ERR(handler))
+ return PTR_ERR(handler);
return handler->get(handler, dentry, name, buffer, size);
}
@@ -736,19 +713,25 @@ generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
if (!buffer) {
for_each_xattr_handler(handlers, handler) {
- size += handler->list(handler, dentry, NULL, 0,
- NULL, 0);
+ if (!handler->name ||
+ (handler->list && !handler->list(dentry)))
+ continue;
+ size += strlen(handler->name) + 1;
}
} else {
char *buf = buffer;
+ size_t len;
for_each_xattr_handler(handlers, handler) {
- size = handler->list(handler, dentry, buf, buffer_size,
- NULL, 0);
- if (size > buffer_size)
+ if (!handler->name ||
+ (handler->list && !handler->list(dentry)))
+ continue;
+ len = strlen(handler->name);
+ if (len + 1 > buffer_size)
return -ERANGE;
- buf += size;
- buffer_size -= size;
+ memcpy(buf, handler->name, len + 1);
+ buf += len + 1;
+ buffer_size -= len + 1;
}
size = buf - buffer;
}
@@ -766,8 +749,8 @@ generic_setxattr(struct dentry *dentry, const char *name, const void *value, siz
if (size == 0)
value = ""; /* empty EA, do not remove */
handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
- if (!handler)
- return -EOPNOTSUPP;
+ if (IS_ERR(handler))
+ return PTR_ERR(handler);
return handler->set(handler, dentry, name, value, size, flags);
}
@@ -781,8 +764,8 @@ generic_removexattr(struct dentry *dentry, const char *name)
const struct xattr_handler *handler;
handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
- if (!handler)
- return -EOPNOTSUPP;
+ if (IS_ERR(handler))
+ return PTR_ERR(handler);
return handler->set(handler, dentry, name, NULL, 0, XATTR_REPLACE);
}
@@ -809,7 +792,7 @@ EXPORT_SYMBOL(generic_removexattr);
const char *xattr_full_name(const struct xattr_handler *handler,
const char *name)
{
- size_t prefix_len = strlen(handler->prefix);
+ size_t prefix_len = strlen(xattr_prefix(handler));
return name - prefix_len;
}
@@ -864,8 +847,22 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
return ret;
}
-static int __simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
- const void *value, size_t size, int flags)
+/**
+ * simple_xattr_set - xattr SET operation for in-memory/pseudo filesystems
+ * @xattrs: target simple_xattr list
+ * @name: name of the extended attribute
+ * @value: value of the xattr. If %NULL, will remove the attribute.
+ * @size: size of the new xattr
+ * @flags: %XATTR_{CREATE|REPLACE}
+ *
+ * %XATTR_CREATE is set, the xattr shouldn't exist already; otherwise fails
+ * with -EEXIST. If %XATTR_REPLACE is set, the xattr should exist;
+ * otherwise, fails with -ENODATA.
+ *
+ * Returns 0 on success, -errno on failure.
+ */
+int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
+ const void *value, size_t size, int flags)
{
struct simple_xattr *xattr;
struct simple_xattr *new_xattr = NULL;
@@ -915,73 +912,64 @@ out:
}
-/**
- * simple_xattr_set - xattr SET operation for in-memory/pseudo filesystems
- * @xattrs: target simple_xattr list
- * @name: name of the new extended attribute
- * @value: value of the new xattr. If %NULL, will remove the attribute
- * @size: size of the new xattr
- * @flags: %XATTR_{CREATE|REPLACE}
- *
- * %XATTR_CREATE is set, the xattr shouldn't exist already; otherwise fails
- * with -EEXIST. If %XATTR_REPLACE is set, the xattr should exist;
- * otherwise, fails with -ENODATA.
- *
- * Returns 0 on success, -errno on failure.
- */
-int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
- const void *value, size_t size, int flags)
-{
- if (size == 0)
- value = ""; /* empty EA, do not remove */
- return __simple_xattr_set(xattrs, name, value, size, flags);
-}
-
-/*
- * xattr REMOVE operation for in-memory/pseudo filesystems
- */
-int simple_xattr_remove(struct simple_xattrs *xattrs, const char *name)
+static bool xattr_is_trusted(const char *name)
{
- return __simple_xattr_set(xattrs, name, NULL, 0, XATTR_REPLACE);
+ return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
}
-static bool xattr_is_trusted(const char *name)
+static int xattr_list_one(char **buffer, ssize_t *remaining_size,
+ const char *name)
{
- return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
+ size_t len = strlen(name) + 1;
+ if (*buffer) {
+ if (*remaining_size < len)
+ return -ERANGE;
+ memcpy(*buffer, name, len);
+ *buffer += len;
+ }
+ *remaining_size -= len;
+ return 0;
}
/*
* xattr LIST operation for in-memory/pseudo filesystems
*/
-ssize_t simple_xattr_list(struct simple_xattrs *xattrs, char *buffer,
- size_t size)
+ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
+ char *buffer, size_t size)
{
bool trusted = capable(CAP_SYS_ADMIN);
struct simple_xattr *xattr;
- size_t used = 0;
+ ssize_t remaining_size = size;
+ int err = 0;
+
+#ifdef CONFIG_FS_POSIX_ACL
+ if (inode->i_acl) {
+ err = xattr_list_one(&buffer, &remaining_size,
+ XATTR_NAME_POSIX_ACL_ACCESS);
+ if (err)
+ return err;
+ }
+ if (inode->i_default_acl) {
+ err = xattr_list_one(&buffer, &remaining_size,
+ XATTR_NAME_POSIX_ACL_DEFAULT);
+ if (err)
+ return err;
+ }
+#endif
spin_lock(&xattrs->lock);
list_for_each_entry(xattr, &xattrs->head, list) {
- size_t len;
-
/* skip "trusted." attributes for unprivileged callers */
if (!trusted && xattr_is_trusted(xattr->name))
continue;
- len = strlen(xattr->name) + 1;
- used += len;
- if (buffer) {
- if (size < used) {
- used = -ERANGE;
- break;
- }
- memcpy(buffer, xattr->name, len);
- buffer += len;
- }
+ err = xattr_list_one(&buffer, &remaining_size, xattr->name);
+ if (err)
+ break;
}
spin_unlock(&xattrs->lock);
- return used;
+ return err ? err : size - remaining_size;
}
/*
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index cc6b768fc..d1c66e465 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -84,6 +84,7 @@ kmem_zalloc(size_t size, xfs_km_flags_t flags)
#define KM_ZONE_HWALIGN SLAB_HWCACHE_ALIGN
#define KM_ZONE_RECLAIM SLAB_RECLAIM_ACCOUNT
#define KM_ZONE_SPREAD SLAB_MEM_SPREAD
+#define KM_ZONE_ACCOUNT SLAB_ACCOUNT
#define kmem_zone kmem_cache
#define kmem_zone_t struct kmem_cache
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 3479294c1..a708e38b4 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -535,6 +535,7 @@ xfs_agfl_write_verify(
}
const struct xfs_buf_ops xfs_agfl_buf_ops = {
+ .name = "xfs_agfl",
.verify_read = xfs_agfl_read_verify,
.verify_write = xfs_agfl_write_verify,
};
@@ -1926,7 +1927,7 @@ xfs_alloc_space_available(
* Decide whether to use this allocation group for this allocation.
* If so, fix up the btree freelist's size.
*/
-STATIC int /* error */
+int /* error */
xfs_alloc_fix_freelist(
struct xfs_alloc_arg *args, /* allocation argument structure */
int flags) /* XFS_ALLOC_FLAG_... */
@@ -2339,6 +2340,7 @@ xfs_agf_write_verify(
}
const struct xfs_buf_ops xfs_agf_buf_ops = {
+ .name = "xfs_agf",
.verify_read = xfs_agf_read_verify,
.verify_write = xfs_agf_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 0ecde4d5c..135eb3d24 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -235,5 +235,6 @@ xfs_alloc_get_rec(
int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
+int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags);
#endif /* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 90de071dd..444626ddb 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -293,14 +293,7 @@ xfs_allocbt_verify(
level = be16_to_cpu(block->bb_level);
switch (block->bb_magic) {
case cpu_to_be32(XFS_ABTB_CRC_MAGIC):
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- return false;
- if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
- return false;
- if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
- return false;
- if (pag &&
- be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ if (!xfs_btree_sblock_v5hdr_verify(bp))
return false;
/* fall through */
case cpu_to_be32(XFS_ABTB_MAGIC):
@@ -311,14 +304,7 @@ xfs_allocbt_verify(
return false;
break;
case cpu_to_be32(XFS_ABTC_CRC_MAGIC):
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- return false;
- if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
- return false;
- if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
- return false;
- if (pag &&
- be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ if (!xfs_btree_sblock_v5hdr_verify(bp))
return false;
/* fall through */
case cpu_to_be32(XFS_ABTC_MAGIC):
@@ -332,21 +318,7 @@ xfs_allocbt_verify(
return false;
}
- /* numrecs verification */
- if (be16_to_cpu(block->bb_numrecs) > mp->m_alloc_mxr[level != 0])
- return false;
-
- /* sibling pointer verification */
- if (!block->bb_u.s.bb_leftsib ||
- (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
- block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
- return false;
- if (!block->bb_u.s.bb_rightsib ||
- (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
- block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
- return false;
-
- return true;
+ return xfs_btree_sblock_verify(bp, mp->m_alloc_mxr[level != 0]);
}
static void
@@ -379,6 +351,7 @@ xfs_allocbt_write_verify(
}
const struct xfs_buf_ops xfs_allocbt_buf_ops = {
+ .name = "xfs_allocbt",
.verify_read = xfs_allocbt_read_verify,
.verify_write = xfs_allocbt_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index f949818fa..fa3b948ef 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -207,7 +207,7 @@ xfs_attr_set(
struct xfs_trans_res tres;
xfs_fsblock_t firstblock;
int rsvd = (flags & ATTR_ROOT) != 0;
- int error, err2, committed, local;
+ int error, err2, local;
XFS_STATS_INC(mp, xs_attr_set);
@@ -334,25 +334,15 @@ xfs_attr_set(
*/
xfs_bmap_init(args.flist, args.firstblock);
error = xfs_attr_shortform_to_leaf(&args);
- if (!error) {
- error = xfs_bmap_finish(&args.trans, args.flist,
- &committed);
- }
+ if (!error)
+ error = xfs_bmap_finish(&args.trans, args.flist, dp);
if (error) {
- ASSERT(committed);
args.trans = NULL;
xfs_bmap_cancel(&flist);
goto out;
}
/*
- * bmap_finish() may have committed the last trans and started
- * a new one. We need the inode to be in all transactions.
- */
- if (committed)
- xfs_trans_ijoin(args.trans, dp, 0);
-
- /*
* Commit the leaf transformation. We'll need another (linked)
* transaction to add the new attribute to the leaf.
*/
@@ -568,7 +558,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
{
xfs_inode_t *dp;
struct xfs_buf *bp;
- int retval, error, committed, forkoff;
+ int retval, error, forkoff;
trace_xfs_attr_leaf_addname(args);
@@ -628,25 +618,15 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
*/
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_attr3_leaf_to_node(args);
- if (!error) {
- error = xfs_bmap_finish(&args->trans, args->flist,
- &committed);
- }
+ if (!error)
+ error = xfs_bmap_finish(&args->trans, args->flist, dp);
if (error) {
- ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
return error;
}
/*
- * bmap_finish() may have committed the last trans and started
- * a new one. We need the inode to be in all transactions.
- */
- if (committed)
- xfs_trans_ijoin(args->trans, dp, 0);
-
- /*
* Commit the current trans (including the inode) and start
* a new one.
*/
@@ -729,25 +709,14 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
/* bp is gone due to xfs_da_shrink_inode */
- if (!error) {
+ if (!error)
error = xfs_bmap_finish(&args->trans,
- args->flist,
- &committed);
- }
+ args->flist, dp);
if (error) {
- ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
return error;
}
-
- /*
- * bmap_finish() may have committed the last trans
- * and started a new one. We need the inode to be
- * in all transactions.
- */
- if (committed)
- xfs_trans_ijoin(args->trans, dp, 0);
}
/*
@@ -775,7 +744,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
{
xfs_inode_t *dp;
struct xfs_buf *bp;
- int error, committed, forkoff;
+ int error, forkoff;
trace_xfs_attr_leaf_removename(args);
@@ -803,23 +772,13 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
/* bp is gone due to xfs_da_shrink_inode */
- if (!error) {
- error = xfs_bmap_finish(&args->trans, args->flist,
- &committed);
- }
+ if (!error)
+ error = xfs_bmap_finish(&args->trans, args->flist, dp);
if (error) {
- ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
return error;
}
-
- /*
- * bmap_finish() may have committed the last trans and started
- * a new one. We need the inode to be in all transactions.
- */
- if (committed)
- xfs_trans_ijoin(args->trans, dp, 0);
}
return 0;
}
@@ -877,7 +836,7 @@ xfs_attr_node_addname(xfs_da_args_t *args)
xfs_da_state_blk_t *blk;
xfs_inode_t *dp;
xfs_mount_t *mp;
- int committed, retval, error;
+ int retval, error;
trace_xfs_attr_node_addname(args);
@@ -938,27 +897,16 @@ restart:
state = NULL;
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_attr3_leaf_to_node(args);
- if (!error) {
+ if (!error)
error = xfs_bmap_finish(&args->trans,
- args->flist,
- &committed);
- }
+ args->flist, dp);
if (error) {
- ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
goto out;
}
/*
- * bmap_finish() may have committed the last trans
- * and started a new one. We need the inode to be
- * in all transactions.
- */
- if (committed)
- xfs_trans_ijoin(args->trans, dp, 0);
-
- /*
* Commit the node conversion and start the next
* trans in the chain.
*/
@@ -977,23 +925,13 @@ restart:
*/
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_da3_split(state);
- if (!error) {
- error = xfs_bmap_finish(&args->trans, args->flist,
- &committed);
- }
+ if (!error)
+ error = xfs_bmap_finish(&args->trans, args->flist, dp);
if (error) {
- ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
goto out;
}
-
- /*
- * bmap_finish() may have committed the last trans and started
- * a new one. We need the inode to be in all transactions.
- */
- if (committed)
- xfs_trans_ijoin(args->trans, dp, 0);
} else {
/*
* Addition succeeded, update Btree hashvals.
@@ -1086,25 +1024,14 @@ restart:
if (retval && (state->path.active > 1)) {
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_da3_join(state);
- if (!error) {
+ if (!error)
error = xfs_bmap_finish(&args->trans,
- args->flist,
- &committed);
- }
+ args->flist, dp);
if (error) {
- ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
goto out;
}
-
- /*
- * bmap_finish() may have committed the last trans
- * and started a new one. We need the inode to be
- * in all transactions.
- */
- if (committed)
- xfs_trans_ijoin(args->trans, dp, 0);
}
/*
@@ -1146,7 +1073,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
xfs_da_state_blk_t *blk;
xfs_inode_t *dp;
struct xfs_buf *bp;
- int retval, error, committed, forkoff;
+ int retval, error, forkoff;
trace_xfs_attr_node_removename(args);
@@ -1220,24 +1147,13 @@ xfs_attr_node_removename(xfs_da_args_t *args)
if (retval && (state->path.active > 1)) {
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_da3_join(state);
- if (!error) {
- error = xfs_bmap_finish(&args->trans, args->flist,
- &committed);
- }
+ if (!error)
+ error = xfs_bmap_finish(&args->trans, args->flist, dp);
if (error) {
- ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
goto out;
}
-
- /*
- * bmap_finish() may have committed the last trans and started
- * a new one. We need the inode to be in all transactions.
- */
- if (committed)
- xfs_trans_ijoin(args->trans, dp, 0);
-
/*
* Commit the Btree join operation and start a new trans.
*/
@@ -1265,25 +1181,14 @@ xfs_attr_node_removename(xfs_da_args_t *args)
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
/* bp is gone due to xfs_da_shrink_inode */
- if (!error) {
+ if (!error)
error = xfs_bmap_finish(&args->trans,
- args->flist,
- &committed);
- }
+ args->flist, dp);
if (error) {
- ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
goto out;
}
-
- /*
- * bmap_finish() may have committed the last trans
- * and started a new one. We need the inode to be
- * in all transactions.
- */
- if (committed)
- xfs_trans_ijoin(args->trans, dp, 0);
} else
xfs_trans_brelse(args->trans, bp);
}
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index aa187f7ba..01a5ecfed 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -328,6 +328,7 @@ xfs_attr3_leaf_read_verify(
}
const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = {
+ .name = "xfs_attr3_leaf",
.verify_read = xfs_attr3_leaf_read_verify,
.verify_write = xfs_attr3_leaf_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 5ab95ffa4..a572532a5 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -201,6 +201,7 @@ xfs_attr3_rmt_write_verify(
}
const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
+ .name = "xfs_attr3_rmt",
.verify_read = xfs_attr3_rmt_read_verify,
.verify_write = xfs_attr3_rmt_write_verify,
};
@@ -447,8 +448,6 @@ xfs_attr_rmtval_set(
* Roll through the "value", allocating blocks on disk as required.
*/
while (blkcnt > 0) {
- int committed;
-
/*
* Allocate a single extent, up to the size of the value.
*
@@ -466,24 +465,14 @@ xfs_attr_rmtval_set(
error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock,
args->total, &map, &nmap, args->flist);
- if (!error) {
- error = xfs_bmap_finish(&args->trans, args->flist,
- &committed);
- }
+ if (!error)
+ error = xfs_bmap_finish(&args->trans, args->flist, dp);
if (error) {
- ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
return error;
}
- /*
- * bmap_finish() may have committed the last trans and started
- * a new one. We need the inode to be in all transactions.
- */
- if (committed)
- xfs_trans_ijoin(args->trans, dp, 0);
-
ASSERT(nmap == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
(map.br_startblock != HOLESTARTBLOCK));
@@ -614,31 +603,20 @@ xfs_attr_rmtval_remove(
blkcnt = args->rmtblkcnt;
done = 0;
while (!done) {
- int committed;
-
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
XFS_BMAPI_ATTRFORK, 1, args->firstblock,
args->flist, &done);
- if (!error) {
+ if (!error)
error = xfs_bmap_finish(&args->trans, args->flist,
- &committed);
- }
+ args->dp);
if (error) {
- ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
return error;
}
/*
- * bmap_finish() may have committed the last trans and started
- * a new one. We need the inode to be in all transactions.
- */
- if (committed)
- xfs_trans_ijoin(args->trans, args->dp, 0);
-
- /*
* Close out trans and start the next one in the chain.
*/
error = xfs_trans_roll(&args->trans, args->dp);
diff --git a/fs/xfs/libxfs/xfs_bit.c b/fs/xfs/libxfs/xfs_bit.c
index 0e8885a59..0a94cce5e 100644
--- a/fs/xfs/libxfs/xfs_bit.c
+++ b/fs/xfs/libxfs/xfs_bit.c
@@ -32,13 +32,13 @@ int
xfs_bitmap_empty(uint *map, uint size)
{
uint i;
- uint ret = 0;
for (i = 0; i < size; i++) {
- ret |= map[i];
+ if (map[i] != 0)
+ return 0;
}
- return (ret == 0);
+ return 1;
}
/*
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 119c2422a..ef00156f4 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -325,9 +325,11 @@ xfs_check_block(
/*
* Check that the extents for the inode ip are in the right order in all
- * btree leaves.
+ * btree leaves. THis becomes prohibitively expensive for large extent count
+ * files, so don't bother with inodes that have more than 10,000 extents in
+ * them. The btree record ordering checks will still be done, so for such large
+ * bmapbt constructs that is going to catch most corruptions.
*/
-
STATIC void
xfs_bmap_check_leaf_extents(
xfs_btree_cur_t *cur, /* btree cursor or null */
@@ -352,6 +354,10 @@ xfs_bmap_check_leaf_extents(
return;
}
+ /* skip large extent count inodes */
+ if (ip->i_d.di_nextents > 10000)
+ return;
+
bno = NULLFSBLOCK;
mp = ip->i_mount;
ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -1111,7 +1117,6 @@ xfs_bmap_add_attrfork(
xfs_trans_t *tp; /* transaction pointer */
int blks; /* space reservation */
int version = 1; /* superblock attr version */
- int committed; /* xaction was committed */
int logflags; /* logging flags */
int error; /* error return value */
@@ -1214,7 +1219,7 @@ xfs_bmap_add_attrfork(
xfs_log_sb(tp);
}
- error = xfs_bmap_finish(&tp, &flist, &committed);
+ error = xfs_bmap_finish(&tp, &flist, NULL);
if (error)
goto bmap_cancel;
error = xfs_trans_commit(tp);
@@ -1723,10 +1728,11 @@ xfs_bmap_add_extent_delay_real(
xfs_filblks_t temp=0; /* value for da_new calculations */
xfs_filblks_t temp2=0;/* value for da_new calculations */
int tmp_rval; /* partial logging flags */
+ int whichfork = XFS_DATA_FORK;
struct xfs_mount *mp;
- mp = bma->tp ? bma->tp->t_mountp : NULL;
- ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK);
+ mp = bma->ip->i_mount;
+ ifp = XFS_IFORK_PTR(bma->ip, whichfork);
ASSERT(bma->idx >= 0);
ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
@@ -1785,7 +1791,7 @@ xfs_bmap_add_extent_delay_real(
* Don't set contiguous if the combined extent would be too large.
* Also check for all-three-contiguous being too large.
*/
- if (bma->idx < bma->ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
+ if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
state |= BMAP_RIGHT_VALID;
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT);
@@ -2016,10 +2022,10 @@ xfs_bmap_add_extent_delay_real(
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
- if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
+ if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
bma->firstblock, bma->flist,
- &bma->cur, 1, &tmp_rval, XFS_DATA_FORK);
+ &bma->cur, 1, &tmp_rval, whichfork);
rval |= tmp_rval;
if (error)
goto done;
@@ -2100,10 +2106,10 @@ xfs_bmap_add_extent_delay_real(
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
- if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
+ if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
bma->firstblock, bma->flist, &bma->cur, 1,
- &tmp_rval, XFS_DATA_FORK);
+ &tmp_rval, whichfork);
rval |= tmp_rval;
if (error)
goto done;
@@ -2169,10 +2175,10 @@ xfs_bmap_add_extent_delay_real(
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
- if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
+ if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
bma->firstblock, bma->flist, &bma->cur,
- 1, &tmp_rval, XFS_DATA_FORK);
+ 1, &tmp_rval, whichfork);
rval |= tmp_rval;
if (error)
goto done;
@@ -2215,13 +2221,13 @@ xfs_bmap_add_extent_delay_real(
}
/* convert to a btree if necessary */
- if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
+ if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
int tmp_logflags; /* partial log flag return val */
ASSERT(bma->cur == NULL);
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
bma->firstblock, bma->flist, &bma->cur,
- da_old > 0, &tmp_logflags, XFS_DATA_FORK);
+ da_old > 0, &tmp_logflags, whichfork);
bma->logflags |= tmp_logflags;
if (error)
goto done;
@@ -2242,7 +2248,7 @@ xfs_bmap_add_extent_delay_real(
if (bma->cur)
bma->cur->bc_private.b.allocated = 0;
- xfs_bmap_check_leaf_extents(bma->cur, bma->ip, XFS_DATA_FORK);
+ xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
done:
bma->logflags |= rval;
return error;
@@ -2939,7 +2945,7 @@ xfs_bmap_add_extent_hole_real(
int state; /* state bits, accessed thru macros */
struct xfs_mount *mp;
- mp = bma->tp ? bma->tp->t_mountp : NULL;
+ mp = bma->ip->i_mount;
ifp = XFS_IFORK_PTR(bma->ip, whichfork);
ASSERT(bma->idx >= 0);
@@ -5950,7 +5956,6 @@ xfs_bmap_split_extent(
struct xfs_trans *tp;
struct xfs_bmap_free free_list;
xfs_fsblock_t firstfsb;
- int committed;
int error;
tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
@@ -5971,7 +5976,7 @@ xfs_bmap_split_extent(
if (error)
goto out;
- error = xfs_bmap_finish(&tp, &free_list, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto out;
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index a160f8a5a..423a34e83 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -195,7 +195,7 @@ void xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len,
struct xfs_bmap_free *flist, struct xfs_mount *mp);
void xfs_bmap_cancel(struct xfs_bmap_free *flist);
int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
- int *committed);
+ struct xfs_inode *ip);
void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork);
int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork);
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 6b0cf6546..1637c37bf 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -720,6 +720,7 @@ xfs_bmbt_write_verify(
}
const struct xfs_buf_ops xfs_bmbt_buf_ops = {
+ .name = "xfs_bmbt",
.verify_read = xfs_bmbt_read_verify,
.verify_write = xfs_bmbt_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index af1bbee55..a0eb18ce3 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4080,3 +4080,61 @@ xfs_btree_change_owner(
return 0;
}
+
+/**
+ * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
+ * btree block
+ *
+ * @bp: buffer containing the btree block
+ * @max_recs: pointer to the m_*_mxr max records field in the xfs mount
+ * @pag_max_level: pointer to the per-ag max level field
+ */
+bool
+xfs_btree_sblock_v5hdr_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ struct xfs_perag *pag = bp->b_pag;
+
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return false;
+ if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
+ return false;
+ if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
+ return false;
+ if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ return false;
+ return true;
+}
+
+/**
+ * xfs_btree_sblock_verify() -- verify a short-format btree block
+ *
+ * @bp: buffer containing the btree block
+ * @max_recs: maximum records allowed in this btree node
+ */
+bool
+xfs_btree_sblock_verify(
+ struct xfs_buf *bp,
+ unsigned int max_recs)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+
+ /* numrecs verification */
+ if (be16_to_cpu(block->bb_numrecs) > max_recs)
+ return false;
+
+ /* sibling pointer verification */
+ if (!block->bb_u.s.bb_leftsib ||
+ (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
+ block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
+ return false;
+ if (!block->bb_u.s.bb_rightsib ||
+ (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
+ block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
+ return false;
+
+ return true;
+}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 992dec063..2e874be70 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -472,4 +472,7 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
#define XFS_BTREE_TRACE_ARGR(c, r)
#define XFS_BTREE_TRACE_CURSOR(c, t)
+bool xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp);
+bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs);
+
#endif /* __XFS_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index e89a0f8f8..097bf7717 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -245,6 +245,7 @@ xfs_da3_node_read_verify(
}
const struct xfs_buf_ops xfs_da3_node_buf_ops = {
+ .name = "xfs_da3_node",
.verify_read = xfs_da3_node_read_verify,
.verify_write = xfs_da3_node_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index 9c10e2b8c..aa17cb788 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -123,6 +123,7 @@ xfs_dir3_block_write_verify(
}
const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
+ .name = "xfs_dir3_block",
.verify_read = xfs_dir3_block_read_verify,
.verify_write = xfs_dir3_block_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index af71a84f3..725fc7841 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -305,11 +305,13 @@ xfs_dir3_data_write_verify(
}
const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
+ .name = "xfs_dir3_data",
.verify_read = xfs_dir3_data_read_verify,
.verify_write = xfs_dir3_data_write_verify,
};
static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = {
+ .name = "xfs_dir3_data_reada",
.verify_read = xfs_dir3_data_reada_verify,
.verify_write = xfs_dir3_data_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index 3923e1f94..b887fb2a2 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -245,11 +245,13 @@ xfs_dir3_leafn_write_verify(
}
const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = {
+ .name = "xfs_dir3_leaf1",
.verify_read = xfs_dir3_leaf1_read_verify,
.verify_write = xfs_dir3_leaf1_write_verify,
};
const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = {
+ .name = "xfs_dir3_leafn",
.verify_read = xfs_dir3_leafn_read_verify,
.verify_write = xfs_dir3_leafn_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 70b0cb2fd..63ee03db7 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -150,6 +150,7 @@ xfs_dir3_free_write_verify(
}
const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
+ .name = "xfs_dir3_free",
.verify_read = xfs_dir3_free_read_verify,
.verify_write = xfs_dir3_free_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index 5331b7f04..3cc3cf767 100644
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -54,7 +54,7 @@ xfs_dqcheck(
xfs_dqid_t id,
uint type, /* used only when IO_dorepair is true */
uint flags,
- char *str)
+ const char *str)
{
xfs_dqblk_t *d = (xfs_dqblk_t *)ddq;
int errs = 0;
@@ -207,7 +207,8 @@ xfs_dquot_buf_verify_crc(
STATIC bool
xfs_dquot_buf_verify(
struct xfs_mount *mp,
- struct xfs_buf *bp)
+ struct xfs_buf *bp,
+ int warn)
{
struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
xfs_dqid_t id = 0;
@@ -240,8 +241,7 @@ xfs_dquot_buf_verify(
if (i == 0)
id = be32_to_cpu(ddq->d_id);
- error = xfs_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN,
- "xfs_dquot_buf_verify");
+ error = xfs_dqcheck(mp, ddq, id + i, 0, warn, __func__);
if (error)
return false;
}
@@ -256,7 +256,7 @@ xfs_dquot_buf_read_verify(
if (!xfs_dquot_buf_verify_crc(mp, bp))
xfs_buf_ioerror(bp, -EFSBADCRC);
- else if (!xfs_dquot_buf_verify(mp, bp))
+ else if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN))
xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error)
@@ -264,6 +264,25 @@ xfs_dquot_buf_read_verify(
}
/*
+ * readahead errors are silent and simply leave the buffer as !done so a real
+ * read will then be run with the xfs_dquot_buf_ops verifier. See
+ * xfs_inode_buf_verify() for why we use EIO and ~XBF_DONE here rather than
+ * reporting the failure.
+ */
+static void
+xfs_dquot_buf_readahead_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+
+ if (!xfs_dquot_buf_verify_crc(mp, bp) ||
+ !xfs_dquot_buf_verify(mp, bp, 0)) {
+ xfs_buf_ioerror(bp, -EIO);
+ bp->b_flags &= ~XBF_DONE;
+ }
+}
+
+/*
* we don't calculate the CRC here as that is done when the dquot is flushed to
* the buffer after the update is done. This ensures that the dquot in the
* buffer always has an up-to-date CRC value.
@@ -274,7 +293,7 @@ xfs_dquot_buf_write_verify(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- if (!xfs_dquot_buf_verify(mp, bp)) {
+ if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) {
xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
@@ -282,7 +301,13 @@ xfs_dquot_buf_write_verify(
}
const struct xfs_buf_ops xfs_dquot_buf_ops = {
+ .name = "xfs_dquot",
.verify_read = xfs_dquot_buf_read_verify,
.verify_write = xfs_dquot_buf_write_verify,
};
+const struct xfs_buf_ops xfs_dquot_buf_ra_ops = {
+ .name = "xfs_dquot_ra",
+ .verify_read = xfs_dquot_buf_readahead_verify,
+ .verify_write = xfs_dquot_buf_write_verify,
+};
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index e2536bb1c..dc97eb21a 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -984,8 +984,6 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
/*
* Values for di_flags
- * There should be a one-to-one correspondence between these flags and the
- * XFS_XFLAG_s.
*/
#define XFS_DIFLAG_REALTIME_BIT 0 /* file's blocks come from rt area */
#define XFS_DIFLAG_PREALLOC_BIT 1 /* file space has been preallocated */
@@ -1026,6 +1024,15 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM)
/*
+ * Values for di_flags2 These start by being exposed to userspace in the upper
+ * 16 bits of the XFS_XFLAG_s range.
+ */
+#define XFS_DIFLAG2_DAX_BIT 0 /* use DAX for this inode */
+#define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT)
+
+#define XFS_DIFLAG2_ANY (XFS_DIFLAG2_DAX)
+
+/*
* Inode number format:
* low inopblog bits - offset in block
* next agblklog bits - block number in ag
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index b2b73a998..fffe3d01b 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -36,40 +36,6 @@ struct dioattr {
#endif
/*
- * Structure for XFS_IOC_FSGETXATTR[A] and XFS_IOC_FSSETXATTR.
- */
-#ifndef HAVE_FSXATTR
-struct fsxattr {
- __u32 fsx_xflags; /* xflags field value (get/set) */
- __u32 fsx_extsize; /* extsize field value (get/set)*/
- __u32 fsx_nextents; /* nextents field value (get) */
- __u32 fsx_projid; /* project identifier (get/set) */
- unsigned char fsx_pad[12];
-};
-#endif
-
-/*
- * Flags for the bs_xflags/fsx_xflags field
- * There should be a one-to-one correspondence between these flags and the
- * XFS_DIFLAG_s.
- */
-#define XFS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */
-#define XFS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */
-#define XFS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */
-#define XFS_XFLAG_APPEND 0x00000010 /* all writes append */
-#define XFS_XFLAG_SYNC 0x00000020 /* all writes synchronous */
-#define XFS_XFLAG_NOATIME 0x00000040 /* do not update access time */
-#define XFS_XFLAG_NODUMP 0x00000080 /* do not include in backups */
-#define XFS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */
-#define XFS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */
-#define XFS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */
-#define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */
-#define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
-#define XFS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */
-#define XFS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */
-#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
-
-/*
* Structure for XFS_IOC_GETBMAP.
* On input, fill in bmv_offset and bmv_length of the first structure
* to indicate the area of interest in the file, and bmv_entries with
@@ -514,8 +480,8 @@ typedef struct xfs_swapext
#define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64)
#define XFS_IOC_FREESP _IOW ('X', 11, struct xfs_flock64)
#define XFS_IOC_DIOINFO _IOR ('X', 30, struct dioattr)
-#define XFS_IOC_FSGETXATTR _IOR ('X', 31, struct fsxattr)
-#define XFS_IOC_FSSETXATTR _IOW ('X', 32, struct fsxattr)
+#define XFS_IOC_FSGETXATTR FS_IOC_FSGETXATTR
+#define XFS_IOC_FSSETXATTR FS_IOC_FSSETXATTR
#define XFS_IOC_ALLOCSP64 _IOW ('X', 36, struct xfs_flock64)
#define XFS_IOC_FREESP64 _IOW ('X', 37, struct xfs_flock64)
#define XFS_IOC_GETBMAP _IOWR('X', 38, struct getbmap)
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 70c1db99f..66d702e6b 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2572,6 +2572,7 @@ xfs_agi_write_verify(
}
const struct xfs_buf_ops xfs_agi_buf_ops = {
+ .name = "xfs_agi",
.verify_read = xfs_agi_read_verify,
.verify_write = xfs_agi_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index f39b285be..c679f3c05 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -221,7 +221,6 @@ xfs_inobt_verify(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
- struct xfs_perag *pag = bp->b_pag;
unsigned int level;
/*
@@ -237,14 +236,7 @@ xfs_inobt_verify(
switch (block->bb_magic) {
case cpu_to_be32(XFS_IBT_CRC_MAGIC):
case cpu_to_be32(XFS_FIBT_CRC_MAGIC):
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- return false;
- if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
- return false;
- if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
- return false;
- if (pag &&
- be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ if (!xfs_btree_sblock_v5hdr_verify(bp))
return false;
/* fall through */
case cpu_to_be32(XFS_IBT_MAGIC):
@@ -254,24 +246,12 @@ xfs_inobt_verify(
return 0;
}
- /* numrecs and level verification */
+ /* level verification */
level = be16_to_cpu(block->bb_level);
if (level >= mp->m_in_maxlevels)
return false;
- if (be16_to_cpu(block->bb_numrecs) > mp->m_inobt_mxr[level != 0])
- return false;
-
- /* sibling pointer verification */
- if (!block->bb_u.s.bb_leftsib ||
- (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
- block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
- return false;
- if (!block->bb_u.s.bb_rightsib ||
- (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
- block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
- return false;
- return true;
+ return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]);
}
static void
@@ -304,6 +284,7 @@ xfs_inobt_write_verify(
}
const struct xfs_buf_ops xfs_inobt_buf_ops = {
+ .name = "xfs_inobt",
.verify_read = xfs_inobt_read_verify,
.verify_write = xfs_inobt_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 65485cfc4..1aabfda66 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -68,6 +68,8 @@ xfs_inobp_check(
* recovery and we don't get unnecssary panics on debug kernels. We use EIO here
* because all we want to do is say readahead failed; there is no-one to report
* the error to, so this will distinguish it from a non-ra verifier failure.
+ * Changes to this readahead error behavour also need to be reflected in
+ * xfs_dquot_buf_readahead_verify().
*/
static void
xfs_inode_buf_verify(
@@ -134,11 +136,13 @@ xfs_inode_buf_write_verify(
}
const struct xfs_buf_ops xfs_inode_buf_ops = {
+ .name = "xfs_inode",
.verify_read = xfs_inode_buf_read_verify,
.verify_write = xfs_inode_buf_write_verify,
};
const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
+ .name = "xxfs_inode_ra",
.verify_read = xfs_inode_buf_readahead_verify,
.verify_write = xfs_inode_buf_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 1c55ccbb3..8e385f91d 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -60,6 +60,7 @@ typedef struct xlog_recover {
*/
#define XLOG_BC_TABLE_SIZE 64
+#define XLOG_RECOVER_CRCPASS 0
#define XLOG_RECOVER_PASS1 1
#define XLOG_RECOVER_PASS2 2
diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index 1b0a08379..f51078f1e 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -153,7 +153,7 @@ typedef __uint16_t xfs_qwarncnt_t;
#define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS)
extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq,
- xfs_dqid_t id, uint type, uint flags, char *str);
+ xfs_dqid_t id, uint type, uint flags, const char *str);
extern int xfs_calc_dquots_per_chunk(unsigned int nbblks);
#endif /* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index a0b071d88..8a53eaa34 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -679,11 +679,13 @@ xfs_sb_write_verify(
}
const struct xfs_buf_ops xfs_sb_buf_ops = {
+ .name = "xfs_sb",
.verify_read = xfs_sb_read_verify,
.verify_write = xfs_sb_write_verify,
};
const struct xfs_buf_ops xfs_sb_quiet_buf_ops = {
+ .name = "xfs_sb_quiet",
.verify_read = xfs_sb_quiet_read_verify,
.verify_write = xfs_sb_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 5be529707..15c3ceb84 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -49,6 +49,7 @@ extern const struct xfs_buf_ops xfs_inobt_buf_ops;
extern const struct xfs_buf_ops xfs_inode_buf_ops;
extern const struct xfs_buf_ops xfs_inode_buf_ra_ops;
extern const struct xfs_buf_ops xfs_dquot_buf_ops;
+extern const struct xfs_buf_ops xfs_dquot_buf_ra_ops;
extern const struct xfs_buf_ops xfs_sb_buf_ops;
extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
extern const struct xfs_buf_ops xfs_symlink_buf_ops;
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index cb6fd20a4..2e2c6716b 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -168,6 +168,7 @@ xfs_symlink_write_verify(
}
const struct xfs_buf_ops xfs_symlink_buf_ops = {
+ .name = "xfs_symlink",
.verify_read = xfs_symlink_read_verify,
.verify_write = xfs_symlink_write_verify,
};
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 6bb470fbb..2d5df1f23 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -252,29 +252,6 @@ xfs_set_mode(struct inode *inode, umode_t mode)
return error;
}
-static int
-xfs_acl_exists(struct inode *inode, unsigned char *name)
-{
- int len = XFS_ACL_MAX_SIZE(XFS_M(inode->i_sb));
-
- return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
- ATTR_ROOT|ATTR_KERNOVAL) == 0);
-}
-
-int
-posix_acl_access_exists(struct inode *inode)
-{
- return xfs_acl_exists(inode, SGI_ACL_FILE);
-}
-
-int
-posix_acl_default_exists(struct inode *inode)
-{
- if (!S_ISDIR(inode->i_mode))
- return 0;
- return xfs_acl_exists(inode, SGI_ACL_DEFAULT);
-}
-
int
xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 52f8255d6..286fa8921 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -24,16 +24,12 @@ struct posix_acl;
#ifdef CONFIG_XFS_POSIX_ACL
extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
extern int xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
-extern int posix_acl_access_exists(struct inode *inode);
-extern int posix_acl_default_exists(struct inode *inode);
#else
static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type)
{
return NULL;
}
# define xfs_set_acl NULL
-# define posix_acl_access_exists(inode) 0
-# define posix_acl_default_exists(inode) 0
#endif /* CONFIG_XFS_POSIX_ACL */
extern void xfs_forget_acl(struct inode *inode, const char *name, int xflags);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 29e7e5dd5..a9ebabfe7 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -55,7 +55,7 @@ xfs_count_page_state(
} while ((bh = bh->b_this_page) != head);
}
-STATIC struct block_device *
+struct block_device *
xfs_find_bdev_for_inode(
struct inode *inode)
{
@@ -1208,6 +1208,10 @@ xfs_vm_writepages(
struct writeback_control *wbc)
{
xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
+ if (dax_mapping(mapping))
+ return dax_writeback_mapping_range(mapping,
+ xfs_find_bdev_for_inode(mapping->host), wbc);
+
return generic_writepages(mapping, wbc);
}
@@ -1917,6 +1921,7 @@ xfs_vm_readpage(
struct file *unused,
struct page *page)
{
+ trace_xfs_vm_readpage(page->mapping->host, 1);
return mpage_readpage(page, xfs_get_blocks);
}
@@ -1927,6 +1932,7 @@ xfs_vm_readpages(
struct list_head *pages,
unsigned nr_pages)
{
+ trace_xfs_vm_readpages(mapping->host, nr_pages);
return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
}
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index f6ffc9ae5..a4343c63f 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -62,5 +62,6 @@ int xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset,
struct buffer_head *map_bh, int create);
extern void xfs_count_page_state(struct page *, int *, int *);
+extern struct block_device *xfs_find_bdev_for_inode(struct inode *);
#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index dbae6490a..6c876012b 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -75,7 +75,8 @@ xfs_zero_extent(
ssize_t size = XFS_FSB_TO_B(mp, count_fsb);
if (IS_DAX(VFS_I(ip)))
- return dax_clear_blocks(VFS_I(ip), block, size);
+ return dax_clear_sectors(xfs_find_bdev_for_inode(VFS_I(ip)),
+ sector, size);
/*
* let the block layer decide on the fastest method of
@@ -91,32 +92,32 @@ xfs_zero_extent(
* last due to locking considerations. We never free any extents in
* the first transaction.
*
- * Return 1 if the given transaction was committed and a new one
- * started, and 0 otherwise in the committed parameter.
+ * If an inode *ip is provided, rejoin it to the transaction if
+ * the transaction was committed.
*/
int /* error */
xfs_bmap_finish(
struct xfs_trans **tp, /* transaction pointer addr */
struct xfs_bmap_free *flist, /* i/o: list extents to free */
- int *committed)/* xact committed or not */
+ struct xfs_inode *ip)
{
struct xfs_efd_log_item *efd; /* extent free data */
struct xfs_efi_log_item *efi; /* extent free intention */
int error; /* error return value */
+ int committed;/* xact committed or not */
struct xfs_bmap_free_item *free; /* free extent item */
struct xfs_bmap_free_item *next; /* next item on free list */
ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
- if (flist->xbf_count == 0) {
- *committed = 0;
+ if (flist->xbf_count == 0)
return 0;
- }
+
efi = xfs_trans_get_efi(*tp, flist->xbf_count);
for (free = flist->xbf_first; free; free = free->xbfi_next)
xfs_trans_log_efi_extent(*tp, efi, free->xbfi_startblock,
free->xbfi_blockcount);
- error = __xfs_trans_roll(tp, NULL, committed);
+ error = __xfs_trans_roll(tp, ip, &committed);
if (error) {
/*
* If the transaction was committed, drop the EFD reference
@@ -128,16 +129,13 @@ xfs_bmap_finish(
* transaction so we should return committed=1 even though we're
* returning an error.
*/
- if (*committed) {
+ if (committed) {
xfs_efi_release(efi);
xfs_force_shutdown((*tp)->t_mountp,
(error == -EFSCORRUPTED) ?
SHUTDOWN_CORRUPT_INCORE :
SHUTDOWN_META_IO_ERROR);
- } else {
- *committed = 1;
}
-
return error;
}
@@ -969,7 +967,6 @@ xfs_alloc_file_space(
xfs_bmbt_irec_t imaps[1], *imapp;
xfs_bmap_free_t free_list;
uint qblocks, resblks, resrtextents;
- int committed;
int error;
trace_xfs_alloc_file_space(ip);
@@ -1064,23 +1061,20 @@ xfs_alloc_file_space(
error = xfs_bmapi_write(tp, ip, startoffset_fsb,
allocatesize_fsb, alloc_type, &firstfsb,
resblks, imapp, &nimaps, &free_list);
- if (error) {
+ if (error)
goto error0;
- }
/*
* Complete the transaction
*/
- error = xfs_bmap_finish(&tp, &free_list, &committed);
- if (error) {
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
+ if (error)
goto error0;
- }
error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- if (error) {
+ if (error)
break;
- }
allocated_fsb = imapp->br_blockcount;
@@ -1206,7 +1200,6 @@ xfs_free_file_space(
xfs_off_t offset,
xfs_off_t len)
{
- int committed;
int done;
xfs_fileoff_t endoffset_fsb;
int error;
@@ -1346,17 +1339,15 @@ xfs_free_file_space(
error = xfs_bunmapi(tp, ip, startoffset_fsb,
endoffset_fsb - startoffset_fsb,
0, 2, &firstfsb, &free_list, &done);
- if (error) {
+ if (error)
goto error0;
- }
/*
* complete the transaction
*/
- error = xfs_bmap_finish(&tp, &free_list, &committed);
- if (error) {
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
+ if (error)
goto error0;
- }
error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -1434,7 +1425,6 @@ xfs_shift_file_space(
int error;
struct xfs_bmap_free free_list;
xfs_fsblock_t first_block;
- int committed;
xfs_fileoff_t stop_fsb;
xfs_fileoff_t next_fsb;
xfs_fileoff_t shift_fsb;
@@ -1526,7 +1516,7 @@ xfs_shift_file_space(
if (error)
goto out_bmap_cancel;
- error = xfs_bmap_finish(&tp, &free_list, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto out_bmap_cancel;
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 39090fc56..435c7de42 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1052,7 +1052,7 @@ xfs_buf_ioend_work(
xfs_buf_ioend(bp);
}
-void
+static void
xfs_buf_ioend_async(
struct xfs_buf *bp)
{
@@ -1649,13 +1649,9 @@ xfs_setsize_buftarg(
btp->bt_meta_sectormask = sectorsize - 1;
if (set_blocksize(btp->bt_bdev, sectorsize)) {
- char name[BDEVNAME_SIZE];
-
- bdevname(btp->bt_bdev, name);
-
xfs_warn(btp->bt_mount,
- "Cannot set_blocksize to %u on device %s",
- sectorsize, name);
+ "Cannot set_blocksize to %u on device %pg",
+ sectorsize, btp->bt_bdev);
return -EINVAL;
}
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index c79b717d9..c75721acd 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -132,6 +132,7 @@ struct xfs_buf_map {
struct xfs_buf_map (map) = { .bm_bn = (blkno), .bm_len = (numblk) };
struct xfs_buf_ops {
+ char *name;
void (*verify_read)(struct xfs_buf *);
void (*verify_write)(struct xfs_buf *);
};
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 7ac6c5c58..9c44d38dc 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -306,7 +306,7 @@ xfs_qm_dqalloc(
xfs_fsblock_t firstblock;
xfs_bmap_free_t flist;
xfs_bmbt_irec_t map;
- int nmaps, error, committed;
+ int nmaps, error;
xfs_buf_t *bp;
xfs_trans_t *tp = *tpp;
@@ -379,11 +379,12 @@ xfs_qm_dqalloc(
xfs_trans_bhold(tp, bp);
- if ((error = xfs_bmap_finish(tpp, &flist, &committed))) {
+ error = xfs_bmap_finish(tpp, &flist, NULL);
+ if (error)
goto error1;
- }
- if (committed) {
+ /* Transaction was committed? */
+ if (*tpp != tp) {
tp = *tpp;
xfs_trans_bjoin(tp, bp);
} else {
@@ -393,9 +394,9 @@ xfs_qm_dqalloc(
*O_bpp = bp;
return 0;
- error1:
+error1:
xfs_bmap_cancel(&flist);
- error0:
+error0:
xfs_iunlock(quotip, XFS_ILOCK_EXCL);
return error;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 74d0e5966..88693a98f 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -164,9 +164,9 @@ xfs_verifier_error(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- xfs_alert(mp, "Metadata %s detected at %pF, block 0x%llx",
+ xfs_alert(mp, "Metadata %s detected at %pF, %s block 0x%llx",
bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
- __return_address, bp->b_bn);
+ __return_address, bp->b_ops->name, bp->b_bn);
xfs_alert(mp, "Unmount and run xfs_repair");
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f5392ab2d..52883ac3c 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -55,7 +55,7 @@ xfs_rw_ilock(
int type)
{
if (type & XFS_IOLOCK_EXCL)
- mutex_lock(&VFS_I(ip)->i_mutex);
+ inode_lock(VFS_I(ip));
xfs_ilock(ip, type);
}
@@ -66,7 +66,7 @@ xfs_rw_iunlock(
{
xfs_iunlock(ip, type);
if (type & XFS_IOLOCK_EXCL)
- mutex_unlock(&VFS_I(ip)->i_mutex);
+ inode_unlock(VFS_I(ip));
}
static inline void
@@ -76,7 +76,7 @@ xfs_rw_ilock_demote(
{
xfs_ilock_demote(ip, type);
if (type & XFS_IOLOCK_EXCL)
- mutex_unlock(&VFS_I(ip)->i_mutex);
+ inode_unlock(VFS_I(ip));
}
/*
@@ -402,19 +402,26 @@ xfs_file_splice_read(
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
- xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-
trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
- /* for dax, we need to avoid the page cache */
- if (IS_DAX(VFS_I(ip)))
- ret = default_file_splice_read(infilp, ppos, pipe, count, flags);
- else
- ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
- if (ret > 0)
- XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret);
+ /*
+ * DAX inodes cannot ues the page cache for splice, so we have to push
+ * them through the VFS IO path. This means it goes through
+ * ->read_iter, which for us takes the XFS_IOLOCK_SHARED. Hence we
+ * cannot lock the splice operation at this level for DAX inodes.
+ */
+ if (IS_DAX(VFS_I(ip))) {
+ ret = default_file_splice_read(infilp, ppos, pipe, count,
+ flags);
+ goto out;
+ }
+ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
+ ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+out:
+ if (ret > 0)
+ XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret);
return ret;
}
@@ -1603,9 +1610,8 @@ xfs_filemap_pmd_fault(
/*
* pfn_mkwrite was originally inteneded to ensure we capture time stamp
* updates on write faults. In reality, it's need to serialise against
- * truncate similar to page_mkwrite. Hence we open-code dax_pfn_mkwrite()
- * here and cycle the XFS_MMAPLOCK_SHARED to ensure we serialise the fault
- * barrier in place.
+ * truncate similar to page_mkwrite. Hence we cycle the XFS_MMAPLOCK_SHARED
+ * to ensure we serialise the fault barrier in place.
*/
static int
xfs_filemap_pfn_mkwrite(
@@ -1628,6 +1634,8 @@ xfs_filemap_pfn_mkwrite(
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;
+ else if (IS_DAX(inode))
+ ret = dax_pfn_mkwrite(vma, vmf);
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
sb_end_pagefault(inode->i_sb);
return ret;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 8ee393996..ceba1a83c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -610,60 +610,69 @@ __xfs_iflock(
STATIC uint
_xfs_dic2xflags(
- __uint16_t di_flags)
+ __uint16_t di_flags,
+ uint64_t di_flags2,
+ bool has_attr)
{
uint flags = 0;
if (di_flags & XFS_DIFLAG_ANY) {
if (di_flags & XFS_DIFLAG_REALTIME)
- flags |= XFS_XFLAG_REALTIME;
+ flags |= FS_XFLAG_REALTIME;
if (di_flags & XFS_DIFLAG_PREALLOC)
- flags |= XFS_XFLAG_PREALLOC;
+ flags |= FS_XFLAG_PREALLOC;
if (di_flags & XFS_DIFLAG_IMMUTABLE)
- flags |= XFS_XFLAG_IMMUTABLE;
+ flags |= FS_XFLAG_IMMUTABLE;
if (di_flags & XFS_DIFLAG_APPEND)
- flags |= XFS_XFLAG_APPEND;
+ flags |= FS_XFLAG_APPEND;
if (di_flags & XFS_DIFLAG_SYNC)
- flags |= XFS_XFLAG_SYNC;
+ flags |= FS_XFLAG_SYNC;
if (di_flags & XFS_DIFLAG_NOATIME)
- flags |= XFS_XFLAG_NOATIME;
+ flags |= FS_XFLAG_NOATIME;
if (di_flags & XFS_DIFLAG_NODUMP)
- flags |= XFS_XFLAG_NODUMP;
+ flags |= FS_XFLAG_NODUMP;
if (di_flags & XFS_DIFLAG_RTINHERIT)
- flags |= XFS_XFLAG_RTINHERIT;
+ flags |= FS_XFLAG_RTINHERIT;
if (di_flags & XFS_DIFLAG_PROJINHERIT)
- flags |= XFS_XFLAG_PROJINHERIT;
+ flags |= FS_XFLAG_PROJINHERIT;
if (di_flags & XFS_DIFLAG_NOSYMLINKS)
- flags |= XFS_XFLAG_NOSYMLINKS;
+ flags |= FS_XFLAG_NOSYMLINKS;
if (di_flags & XFS_DIFLAG_EXTSIZE)
- flags |= XFS_XFLAG_EXTSIZE;
+ flags |= FS_XFLAG_EXTSIZE;
if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
- flags |= XFS_XFLAG_EXTSZINHERIT;
+ flags |= FS_XFLAG_EXTSZINHERIT;
if (di_flags & XFS_DIFLAG_NODEFRAG)
- flags |= XFS_XFLAG_NODEFRAG;
+ flags |= FS_XFLAG_NODEFRAG;
if (di_flags & XFS_DIFLAG_FILESTREAM)
- flags |= XFS_XFLAG_FILESTREAM;
+ flags |= FS_XFLAG_FILESTREAM;
}
+ if (di_flags2 & XFS_DIFLAG2_ANY) {
+ if (di_flags2 & XFS_DIFLAG2_DAX)
+ flags |= FS_XFLAG_DAX;
+ }
+
+ if (has_attr)
+ flags |= FS_XFLAG_HASATTR;
+
return flags;
}
uint
xfs_ip2xflags(
- xfs_inode_t *ip)
+ struct xfs_inode *ip)
{
- xfs_icdinode_t *dic = &ip->i_d;
+ struct xfs_icdinode *dic = &ip->i_d;
- return _xfs_dic2xflags(dic->di_flags) |
- (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0);
+ return _xfs_dic2xflags(dic->di_flags, dic->di_flags2, XFS_IFORK_Q(ip));
}
uint
xfs_dic2xflags(
- xfs_dinode_t *dip)
+ struct xfs_dinode *dip)
{
- return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) |
- (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
+ return _xfs_dic2xflags(be16_to_cpu(dip->di_flags),
+ be64_to_cpu(dip->di_flags2), XFS_DFORK_Q(dip));
}
/*
@@ -862,7 +871,8 @@ xfs_ialloc(
case S_IFREG:
case S_IFDIR:
if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
- uint di_flags = 0;
+ uint64_t di_flags2 = 0;
+ uint di_flags = 0;
if (S_ISDIR(mode)) {
if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
@@ -898,7 +908,11 @@ xfs_ialloc(
di_flags |= XFS_DIFLAG_NODEFRAG;
if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
di_flags |= XFS_DIFLAG_FILESTREAM;
+ if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
+ di_flags2 |= XFS_DIFLAG2_DAX;
+
ip->i_d.di_flags |= di_flags;
+ ip->i_d.di_flags2 |= di_flags2;
}
/* FALLTHROUGH */
case S_IFLNK:
@@ -1143,7 +1157,6 @@ xfs_create(
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
bool unlock_dp_on_error = false;
- int committed;
prid_t prid;
struct xfs_dquot *udqp = NULL;
struct xfs_dquot *gdqp = NULL;
@@ -1226,7 +1239,7 @@ xfs_create(
* pointing to itself.
*/
error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
- prid, resblks > 0, &ip, &committed);
+ prid, resblks > 0, &ip, NULL);
if (error)
goto out_trans_cancel;
@@ -1275,7 +1288,7 @@ xfs_create(
*/
xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
- error = xfs_bmap_finish(&tp, &free_list, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto out_bmap_cancel;
@@ -1427,7 +1440,6 @@ xfs_link(
int error;
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
- int committed;
int resblks;
trace_xfs_link(tdp, target_name);
@@ -1502,11 +1514,10 @@ xfs_link(
* link transaction goes to disk before returning to
* the user.
*/
- if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
+ if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
xfs_trans_set_sync(tp);
- }
- error = xfs_bmap_finish (&tp, &free_list, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error) {
xfs_bmap_cancel(&free_list);
goto error_return;
@@ -1555,7 +1566,6 @@ xfs_itruncate_extents(
xfs_fileoff_t first_unmap_block;
xfs_fileoff_t last_block;
xfs_filblks_t unmap_len;
- int committed;
int error = 0;
int done = 0;
@@ -1601,9 +1611,7 @@ xfs_itruncate_extents(
* Duplicate the transaction that has the permanent
* reservation and commit the old transaction.
*/
- error = xfs_bmap_finish(&tp, &free_list, &committed);
- if (committed)
- xfs_trans_ijoin(tp, ip, 0);
+ error = xfs_bmap_finish(&tp, &free_list, ip);
if (error)
goto out_bmap_cancel;
@@ -1774,7 +1782,6 @@ xfs_inactive_ifree(
{
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
- int committed;
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
int error;
@@ -1841,7 +1848,7 @@ xfs_inactive_ifree(
* Just ignore errors at this point. There is nothing we can do except
* to try to keep going. Make sure it's not a silent error.
*/
- error = xfs_bmap_finish(&tp, &free_list, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error) {
xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
__func__, error);
@@ -2523,7 +2530,6 @@ xfs_remove(
int error = 0;
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
- int committed;
uint resblks;
trace_xfs_remove(dp, name);
@@ -2624,7 +2630,7 @@ xfs_remove(
if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
xfs_trans_set_sync(tp);
- error = xfs_bmap_finish(&tp, &free_list, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto out_bmap_cancel;
@@ -2701,7 +2707,6 @@ xfs_finish_rename(
struct xfs_trans *tp,
struct xfs_bmap_free *free_list)
{
- int committed = 0;
int error;
/*
@@ -2711,7 +2716,7 @@ xfs_finish_rename(
if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
xfs_trans_set_sync(tp);
- error = xfs_bmap_finish(&tp, free_list, &committed);
+ error = xfs_bmap_finish(&tp, free_list, NULL);
if (error) {
xfs_bmap_cancel(free_list);
xfs_trans_cancel(tp);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index d42738dee..478d04e07 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -859,25 +859,25 @@ xfs_merge_ioc_xflags(
unsigned int xflags = start;
if (flags & FS_IMMUTABLE_FL)
- xflags |= XFS_XFLAG_IMMUTABLE;
+ xflags |= FS_XFLAG_IMMUTABLE;
else
- xflags &= ~XFS_XFLAG_IMMUTABLE;
+ xflags &= ~FS_XFLAG_IMMUTABLE;
if (flags & FS_APPEND_FL)
- xflags |= XFS_XFLAG_APPEND;
+ xflags |= FS_XFLAG_APPEND;
else
- xflags &= ~XFS_XFLAG_APPEND;
+ xflags &= ~FS_XFLAG_APPEND;
if (flags & FS_SYNC_FL)
- xflags |= XFS_XFLAG_SYNC;
+ xflags |= FS_XFLAG_SYNC;
else
- xflags &= ~XFS_XFLAG_SYNC;
+ xflags &= ~FS_XFLAG_SYNC;
if (flags & FS_NOATIME_FL)
- xflags |= XFS_XFLAG_NOATIME;
+ xflags |= FS_XFLAG_NOATIME;
else
- xflags &= ~XFS_XFLAG_NOATIME;
+ xflags &= ~FS_XFLAG_NOATIME;
if (flags & FS_NODUMP_FL)
- xflags |= XFS_XFLAG_NODUMP;
+ xflags |= FS_XFLAG_NODUMP;
else
- xflags &= ~XFS_XFLAG_NODUMP;
+ xflags &= ~FS_XFLAG_NODUMP;
return xflags;
}
@@ -945,40 +945,51 @@ xfs_set_diflags(
unsigned int xflags)
{
unsigned int di_flags;
+ uint64_t di_flags2;
/* can't set PREALLOC this way, just preserve it */
di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
- if (xflags & XFS_XFLAG_IMMUTABLE)
+ if (xflags & FS_XFLAG_IMMUTABLE)
di_flags |= XFS_DIFLAG_IMMUTABLE;
- if (xflags & XFS_XFLAG_APPEND)
+ if (xflags & FS_XFLAG_APPEND)
di_flags |= XFS_DIFLAG_APPEND;
- if (xflags & XFS_XFLAG_SYNC)
+ if (xflags & FS_XFLAG_SYNC)
di_flags |= XFS_DIFLAG_SYNC;
- if (xflags & XFS_XFLAG_NOATIME)
+ if (xflags & FS_XFLAG_NOATIME)
di_flags |= XFS_DIFLAG_NOATIME;
- if (xflags & XFS_XFLAG_NODUMP)
+ if (xflags & FS_XFLAG_NODUMP)
di_flags |= XFS_DIFLAG_NODUMP;
- if (xflags & XFS_XFLAG_NODEFRAG)
+ if (xflags & FS_XFLAG_NODEFRAG)
di_flags |= XFS_DIFLAG_NODEFRAG;
- if (xflags & XFS_XFLAG_FILESTREAM)
+ if (xflags & FS_XFLAG_FILESTREAM)
di_flags |= XFS_DIFLAG_FILESTREAM;
if (S_ISDIR(ip->i_d.di_mode)) {
- if (xflags & XFS_XFLAG_RTINHERIT)
+ if (xflags & FS_XFLAG_RTINHERIT)
di_flags |= XFS_DIFLAG_RTINHERIT;
- if (xflags & XFS_XFLAG_NOSYMLINKS)
+ if (xflags & FS_XFLAG_NOSYMLINKS)
di_flags |= XFS_DIFLAG_NOSYMLINKS;
- if (xflags & XFS_XFLAG_EXTSZINHERIT)
+ if (xflags & FS_XFLAG_EXTSZINHERIT)
di_flags |= XFS_DIFLAG_EXTSZINHERIT;
- if (xflags & XFS_XFLAG_PROJINHERIT)
+ if (xflags & FS_XFLAG_PROJINHERIT)
di_flags |= XFS_DIFLAG_PROJINHERIT;
} else if (S_ISREG(ip->i_d.di_mode)) {
- if (xflags & XFS_XFLAG_REALTIME)
+ if (xflags & FS_XFLAG_REALTIME)
di_flags |= XFS_DIFLAG_REALTIME;
- if (xflags & XFS_XFLAG_EXTSIZE)
+ if (xflags & FS_XFLAG_EXTSIZE)
di_flags |= XFS_DIFLAG_EXTSIZE;
}
-
ip->i_d.di_flags = di_flags;
+
+ /* diflags2 only valid for v3 inodes. */
+ if (ip->i_d.di_version < 3)
+ return;
+
+ di_flags2 = 0;
+ if (xflags & FS_XFLAG_DAX)
+ di_flags2 |= XFS_DIFLAG2_DAX;
+
+ ip->i_d.di_flags2 = di_flags2;
+
}
STATIC void
@@ -988,22 +999,27 @@ xfs_diflags_to_linux(
struct inode *inode = VFS_I(ip);
unsigned int xflags = xfs_ip2xflags(ip);
- if (xflags & XFS_XFLAG_IMMUTABLE)
+ if (xflags & FS_XFLAG_IMMUTABLE)
inode->i_flags |= S_IMMUTABLE;
else
inode->i_flags &= ~S_IMMUTABLE;
- if (xflags & XFS_XFLAG_APPEND)
+ if (xflags & FS_XFLAG_APPEND)
inode->i_flags |= S_APPEND;
else
inode->i_flags &= ~S_APPEND;
- if (xflags & XFS_XFLAG_SYNC)
+ if (xflags & FS_XFLAG_SYNC)
inode->i_flags |= S_SYNC;
else
inode->i_flags &= ~S_SYNC;
- if (xflags & XFS_XFLAG_NOATIME)
+ if (xflags & FS_XFLAG_NOATIME)
inode->i_flags |= S_NOATIME;
else
inode->i_flags &= ~S_NOATIME;
+ if (xflags & FS_XFLAG_DAX)
+ inode->i_flags |= S_DAX;
+ else
+ inode->i_flags &= ~S_DAX;
+
}
static int
@@ -1016,11 +1032,11 @@ xfs_ioctl_setattr_xflags(
/* Can't change realtime flag if any extents are allocated. */
if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
- XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & XFS_XFLAG_REALTIME))
+ XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & FS_XFLAG_REALTIME))
return -EINVAL;
/* If realtime flag is set then must have realtime device */
- if (fa->fsx_xflags & XFS_XFLAG_REALTIME) {
+ if (fa->fsx_xflags & FS_XFLAG_REALTIME) {
if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 ||
(ip->i_d.di_extsize % mp->m_sb.sb_rextsize))
return -EINVAL;
@@ -1031,7 +1047,7 @@ xfs_ioctl_setattr_xflags(
* we have appropriate permission.
*/
if (((ip->i_d.di_flags & (XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND)) ||
- (fa->fsx_xflags & (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
+ (fa->fsx_xflags & (FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND))) &&
!capable(CAP_LINUX_IMMUTABLE))
return -EPERM;
@@ -1095,8 +1111,8 @@ out_cancel:
* extent size hint validation is somewhat cumbersome. Rules are:
*
* 1. extent size hint is only valid for directories and regular files
- * 2. XFS_XFLAG_EXTSIZE is only valid for regular files
- * 3. XFS_XFLAG_EXTSZINHERIT is only valid for directories.
+ * 2. FS_XFLAG_EXTSIZE is only valid for regular files
+ * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories.
* 4. can only be changed on regular files if no extents are allocated
* 5. can be changed on directories at any time
* 6. extsize hint of 0 turns off hints, clears inode flags.
@@ -1112,10 +1128,10 @@ xfs_ioctl_setattr_check_extsize(
{
struct xfs_mount *mp = ip->i_mount;
- if ((fa->fsx_xflags & XFS_XFLAG_EXTSIZE) && !S_ISREG(ip->i_d.di_mode))
+ if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(ip->i_d.di_mode))
return -EINVAL;
- if ((fa->fsx_xflags & XFS_XFLAG_EXTSZINHERIT) &&
+ if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
!S_ISDIR(ip->i_d.di_mode))
return -EINVAL;
@@ -1132,7 +1148,7 @@ xfs_ioctl_setattr_check_extsize(
return -EINVAL;
if (XFS_IS_REALTIME_INODE(ip) ||
- (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
+ (fa->fsx_xflags & FS_XFLAG_REALTIME)) {
size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
} else {
size = mp->m_sb.sb_blocksize;
@@ -1143,7 +1159,7 @@ xfs_ioctl_setattr_check_extsize(
if (fa->fsx_extsize % size)
return -EINVAL;
} else
- fa->fsx_xflags &= ~(XFS_XFLAG_EXTSIZE | XFS_XFLAG_EXTSZINHERIT);
+ fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT);
return 0;
}
@@ -1168,7 +1184,7 @@ xfs_ioctl_setattr_check_projid(
if (xfs_get_projid(ip) != fa->fsx_projid)
return -EINVAL;
- if ((fa->fsx_xflags & XFS_XFLAG_PROJINHERIT) !=
+ if ((fa->fsx_xflags & FS_XFLAG_PROJINHERIT) !=
(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT))
return -EINVAL;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index f4f5b43cf..d81bdc080 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -129,7 +129,6 @@ xfs_iomap_write_direct(
xfs_trans_t *tp;
xfs_bmap_free_t free_list;
uint qblocks, resblks, resrtextents;
- int committed;
int error;
int lockmode;
int bmapi_flags = XFS_BMAPI_PREALLOC;
@@ -203,15 +202,20 @@ xfs_iomap_write_direct(
* this outside the transaction context, but if we commit and then crash
* we may not have zeroed the blocks and this will be exposed on
* recovery of the allocation. Hence we must zero before commit.
+ *
* Further, if we are mapping unwritten extents here, we need to zero
* and convert them to written so that we don't need an unwritten extent
* callback for DAX. This also means that we need to be able to dip into
- * the reserve block pool if there is no space left but we need to do
- * unwritten extent conversion.
+ * the reserve block pool for bmbt block allocation if there is no space
+ * left but we need to do unwritten extent conversion.
*/
+
if (IS_DAX(VFS_I(ip))) {
bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO;
- tp->t_flags |= XFS_TRANS_RESERVE;
+ if (ISUNWRITTEN(imap)) {
+ tp->t_flags |= XFS_TRANS_RESERVE;
+ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
+ }
}
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
resblks, resrtextents);
@@ -247,7 +251,7 @@ xfs_iomap_write_direct(
/*
* Complete the transaction
*/
- error = xfs_bmap_finish(&tp, &free_list, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto out_bmap_cancel;
@@ -693,7 +697,7 @@ xfs_iomap_write_allocate(
xfs_bmap_free_t free_list;
xfs_filblks_t count_fsb;
xfs_trans_t *tp;
- int nimaps, committed;
+ int nimaps;
int error = 0;
int nres;
@@ -794,7 +798,7 @@ xfs_iomap_write_allocate(
if (error)
goto trans_cancel;
- error = xfs_bmap_finish(&tp, &free_list, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto trans_cancel;
@@ -852,7 +856,6 @@ xfs_iomap_write_unwritten(
xfs_bmap_free_t free_list;
xfs_fsize_t i_size;
uint resblks;
- int committed;
int error;
trace_xfs_unwritten_convert(ip, offset, count);
@@ -924,7 +927,7 @@ xfs_iomap_write_unwritten(
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
}
- error = xfs_bmap_finish(&tp, &free_list, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto error_on_bmapi_transaction;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 245268a0c..76b71a1c6 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -414,13 +414,17 @@ xfs_vn_rename(
* uio is kmalloced for this reason...
*/
STATIC const char *
-xfs_vn_follow_link(
+xfs_vn_get_link(
struct dentry *dentry,
- void **cookie)
+ struct inode *inode,
+ struct delayed_call *done)
{
char *link;
int error = -ENOMEM;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
if (!link)
goto out_err;
@@ -429,7 +433,8 @@ xfs_vn_follow_link(
if (unlikely(error))
goto out_kfree;
- return *cookie = link;
+ set_delayed_call(done, kfree_link, link);
+ return link;
out_kfree:
kfree(link);
@@ -1172,8 +1177,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {
static const struct inode_operations xfs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = xfs_vn_follow_link,
- .put_link = kfree_put_link,
+ .get_link = xfs_vn_get_link,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
.setxattr = generic_setxattr,
@@ -1201,8 +1205,8 @@ xfs_diflags_to_iflags(
inode->i_flags |= S_SYNC;
if (flags & XFS_DIFLAG_NOATIME)
inode->i_flags |= S_NOATIME;
- /* XXX: Also needs an on-disk per inode flag! */
- if (ip->i_mount->m_flags & XFS_MOUNT_DAX)
+ if (ip->i_mount->m_flags & XFS_MOUNT_DAX ||
+ ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
inode->i_flags |= S_DAX;
}
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index f52c72a1a..9c9a1c9bc 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1188,10 +1188,16 @@ xlog_iodone(xfs_buf_t *bp)
int aborted = 0;
/*
- * Race to shutdown the filesystem if we see an error.
+ * Race to shutdown the filesystem if we see an error or the iclog is in
+ * IOABORT state. The IOABORT state is only set in DEBUG mode to inject
+ * CRC errors into log recovery.
*/
- if (XFS_TEST_ERROR(bp->b_error, l->l_mp,
- XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) {
+ if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR,
+ XFS_RANDOM_IODONE_IOERR) ||
+ iclog->ic_state & XLOG_STATE_IOABORT) {
+ if (iclog->ic_state & XLOG_STATE_IOABORT)
+ iclog->ic_state &= ~XLOG_STATE_IOABORT;
+
xfs_buf_ioerror_alert(bp, __func__);
xfs_buf_stale(bp);
xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR);
@@ -1838,6 +1844,23 @@ xlog_sync(
/* calculcate the checksum */
iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header,
iclog->ic_datap, size);
+#ifdef DEBUG
+ /*
+ * Intentionally corrupt the log record CRC based on the error injection
+ * frequency, if defined. This facilitates testing log recovery in the
+ * event of torn writes. Hence, set the IOABORT state to abort the log
+ * write on I/O completion and shutdown the fs. The subsequent mount
+ * detects the bad CRC and attempts to recover.
+ */
+ if (log->l_badcrc_factor &&
+ (prandom_u32() % log->l_badcrc_factor == 0)) {
+ iclog->ic_header.h_crc &= 0xAAAAAAAA;
+ iclog->ic_state |= XLOG_STATE_IOABORT;
+ xfs_warn(log->l_mp,
+ "Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.",
+ be64_to_cpu(iclog->ic_header.h_lsn));
+ }
+#endif
bp->b_io_length = BTOBB(count);
bp->b_fspriv = iclog;
@@ -2045,12 +2068,14 @@ xlog_print_tic_res(
"QM_DQCLUSTER",
"QM_QINOCREATE",
"QM_QUOTAOFF_END",
- "SB_UNIT",
"FSYNC_TS",
"GROWFSRT_ALLOC",
"GROWFSRT_ZERO",
"GROWFSRT_FREE",
- "SWAPEXT"
+ "SWAPEXT",
+ "CHECKPOINT",
+ "ICREATE",
+ "CREATE_TMPFILE"
};
xfs_warn(mp, "xlog_write: reservation summary:");
@@ -2791,11 +2816,19 @@ xlog_state_do_callback(
}
} while (!ioerrors && loopdidcallbacks);
+#ifdef DEBUG
/*
- * make one last gasp attempt to see if iclogs are being left in
- * limbo..
+ * Make one last gasp attempt to see if iclogs are being left in limbo.
+ * If the above loop finds an iclog earlier than the current iclog and
+ * in one of the syncing states, the current iclog is put into
+ * DO_CALLBACK and the callbacks are deferred to the completion of the
+ * earlier iclog. Walk the iclogs in order and make sure that no iclog
+ * is in DO_CALLBACK unless an earlier iclog is in one of the syncing
+ * states.
+ *
+ * Note that SYNCING|IOABORT is a valid state so we cannot just check
+ * for ic_state == SYNCING.
*/
-#ifdef DEBUG
if (funcdidcallbacks) {
first_iclog = iclog = log->l_iclog;
do {
@@ -2810,7 +2843,7 @@ xlog_state_do_callback(
* IOERROR - give up hope all ye who enter here
*/
if (iclog->ic_state == XLOG_STATE_WANT_SYNC ||
- iclog->ic_state == XLOG_STATE_SYNCING ||
+ iclog->ic_state & XLOG_STATE_SYNCING ||
iclog->ic_state == XLOG_STATE_DONE_SYNC ||
iclog->ic_state == XLOG_STATE_IOERROR )
break;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 8daba7491..ed8896310 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -62,6 +62,7 @@ static inline uint xlog_get_client_id(__be32 i)
#define XLOG_STATE_CALLBACK 0x0020 /* Callback functions now */
#define XLOG_STATE_DIRTY 0x0040 /* Dirty IC log, not ready for ACTIVE status*/
#define XLOG_STATE_IOERROR 0x0080 /* IO error happened in sync'ing log */
+#define XLOG_STATE_IOABORT 0x0100 /* force abort on I/O completion (debug) */
#define XLOG_STATE_ALL 0x7FFF /* All possible valid flags */
#define XLOG_STATE_NOTUSED 0x8000 /* This IC log not being used */
@@ -410,6 +411,8 @@ struct xlog {
/* The following field are used for debugging; need to hold icloglock */
#ifdef DEBUG
void *l_iclog_bak[XLOG_MAX_ICLOGS];
+ /* log record crc error injection factor */
+ uint32_t l_badcrc_factor;
#endif
};
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index c5ecaacdd..be5568839 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -61,6 +61,9 @@ xlog_recover_check_summary(
#else
#define xlog_recover_check_summary(log)
#endif
+STATIC int
+xlog_do_recovery_pass(
+ struct xlog *, xfs_daddr_t, xfs_daddr_t, int, xfs_daddr_t *);
/*
* This structure is used during recovery to record the buf log items which
@@ -868,136 +871,365 @@ validate_head:
}
/*
- * Find the sync block number or the tail of the log.
- *
- * This will be the block number of the last record to have its
- * associated buffers synced to disk. Every log record header has
- * a sync lsn embedded in it. LSNs hold block numbers, so it is easy
- * to get a sync block number. The only concern is to figure out which
- * log record header to believe.
- *
- * The following algorithm uses the log record header with the largest
- * lsn. The entire log record does not need to be valid. We only care
- * that the header is valid.
+ * Seek backwards in the log for log record headers.
*
- * We could speed up search by using current head_blk buffer, but it is not
- * available.
+ * Given a starting log block, walk backwards until we find the provided number
+ * of records or hit the provided tail block. The return value is the number of
+ * records encountered or a negative error code. The log block and buffer
+ * pointer of the last record seen are returned in rblk and rhead respectively.
*/
STATIC int
-xlog_find_tail(
+xlog_rseek_logrec_hdr(
struct xlog *log,
- xfs_daddr_t *head_blk,
- xfs_daddr_t *tail_blk)
+ xfs_daddr_t head_blk,
+ xfs_daddr_t tail_blk,
+ int count,
+ struct xfs_buf *bp,
+ xfs_daddr_t *rblk,
+ struct xlog_rec_header **rhead,
+ bool *wrapped)
{
- xlog_rec_header_t *rhead;
- xlog_op_header_t *op_head;
+ int i;
+ int error;
+ int found = 0;
char *offset = NULL;
- xfs_buf_t *bp;
- int error, i, found;
- xfs_daddr_t umount_data_blk;
- xfs_daddr_t after_umount_blk;
- xfs_lsn_t tail_lsn;
- int hblks;
+ xfs_daddr_t end_blk;
- found = 0;
+ *wrapped = false;
/*
- * Find previous log record
+ * Walk backwards from the head block until we hit the tail or the first
+ * block in the log.
*/
- if ((error = xlog_find_head(log, head_blk)))
- return error;
-
- bp = xlog_get_bp(log, 1);
- if (!bp)
- return -ENOMEM;
- if (*head_blk == 0) { /* special case */
- error = xlog_bread(log, 0, 1, bp, &offset);
+ end_blk = head_blk > tail_blk ? tail_blk : 0;
+ for (i = (int) head_blk - 1; i >= end_blk; i--) {
+ error = xlog_bread(log, i, 1, bp, &offset);
if (error)
- goto done;
+ goto out_error;
- if (xlog_get_cycle(offset) == 0) {
- *tail_blk = 0;
- /* leave all other log inited values alone */
- goto done;
+ if (*(__be32 *) offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
+ *rblk = i;
+ *rhead = (struct xlog_rec_header *) offset;
+ if (++found == count)
+ break;
}
}
/*
- * Search backwards looking for log record header block
+ * If we haven't hit the tail block or the log record header count,
+ * start looking again from the end of the physical log. Note that
+ * callers can pass head == tail if the tail is not yet known.
*/
- ASSERT(*head_blk < INT_MAX);
- for (i = (int)(*head_blk) - 1; i >= 0; i--) {
+ if (tail_blk >= head_blk && found != count) {
+ for (i = log->l_logBBsize - 1; i >= (int) tail_blk; i--) {
+ error = xlog_bread(log, i, 1, bp, &offset);
+ if (error)
+ goto out_error;
+
+ if (*(__be32 *)offset ==
+ cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
+ *wrapped = true;
+ *rblk = i;
+ *rhead = (struct xlog_rec_header *) offset;
+ if (++found == count)
+ break;
+ }
+ }
+ }
+
+ return found;
+
+out_error:
+ return error;
+}
+
+/*
+ * Seek forward in the log for log record headers.
+ *
+ * Given head and tail blocks, walk forward from the tail block until we find
+ * the provided number of records or hit the head block. The return value is the
+ * number of records encountered or a negative error code. The log block and
+ * buffer pointer of the last record seen are returned in rblk and rhead
+ * respectively.
+ */
+STATIC int
+xlog_seek_logrec_hdr(
+ struct xlog *log,
+ xfs_daddr_t head_blk,
+ xfs_daddr_t tail_blk,
+ int count,
+ struct xfs_buf *bp,
+ xfs_daddr_t *rblk,
+ struct xlog_rec_header **rhead,
+ bool *wrapped)
+{
+ int i;
+ int error;
+ int found = 0;
+ char *offset = NULL;
+ xfs_daddr_t end_blk;
+
+ *wrapped = false;
+
+ /*
+ * Walk forward from the tail block until we hit the head or the last
+ * block in the log.
+ */
+ end_blk = head_blk > tail_blk ? head_blk : log->l_logBBsize - 1;
+ for (i = (int) tail_blk; i <= end_blk; i++) {
error = xlog_bread(log, i, 1, bp, &offset);
if (error)
- goto done;
+ goto out_error;
- if (*(__be32 *)offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
- found = 1;
- break;
+ if (*(__be32 *) offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
+ *rblk = i;
+ *rhead = (struct xlog_rec_header *) offset;
+ if (++found == count)
+ break;
}
}
+
/*
- * If we haven't found the log record header block, start looking
- * again from the end of the physical log. XXXmiken: There should be
- * a check here to make sure we didn't search more than N blocks in
- * the previous code.
+ * If we haven't hit the head block or the log record header count,
+ * start looking again from the start of the physical log.
*/
- if (!found) {
- for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) {
+ if (tail_blk > head_blk && found != count) {
+ for (i = 0; i < (int) head_blk; i++) {
error = xlog_bread(log, i, 1, bp, &offset);
if (error)
- goto done;
+ goto out_error;
if (*(__be32 *)offset ==
cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
- found = 2;
- break;
+ *wrapped = true;
+ *rblk = i;
+ *rhead = (struct xlog_rec_header *) offset;
+ if (++found == count)
+ break;
}
}
}
- if (!found) {
- xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
- xlog_put_bp(bp);
- ASSERT(0);
- return -EIO;
+
+ return found;
+
+out_error:
+ return error;
+}
+
+/*
+ * Check the log tail for torn writes. This is required when torn writes are
+ * detected at the head and the head had to be walked back to a previous record.
+ * The tail of the previous record must now be verified to ensure the torn
+ * writes didn't corrupt the previous tail.
+ *
+ * Return an error if CRC verification fails as recovery cannot proceed.
+ */
+STATIC int
+xlog_verify_tail(
+ struct xlog *log,
+ xfs_daddr_t head_blk,
+ xfs_daddr_t tail_blk)
+{
+ struct xlog_rec_header *thead;
+ struct xfs_buf *bp;
+ xfs_daddr_t first_bad;
+ int count;
+ int error = 0;
+ bool wrapped;
+ xfs_daddr_t tmp_head;
+
+ bp = xlog_get_bp(log, 1);
+ if (!bp)
+ return -ENOMEM;
+
+ /*
+ * Seek XLOG_MAX_ICLOGS + 1 records past the current tail record to get
+ * a temporary head block that points after the last possible
+ * concurrently written record of the tail.
+ */
+ count = xlog_seek_logrec_hdr(log, head_blk, tail_blk,
+ XLOG_MAX_ICLOGS + 1, bp, &tmp_head, &thead,
+ &wrapped);
+ if (count < 0) {
+ error = count;
+ goto out;
}
- /* find blk_no of tail of log */
- rhead = (xlog_rec_header_t *)offset;
- *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
+ /*
+ * If the call above didn't find XLOG_MAX_ICLOGS + 1 records, we ran
+ * into the actual log head. tmp_head points to the start of the record
+ * so update it to the actual head block.
+ */
+ if (count < XLOG_MAX_ICLOGS + 1)
+ tmp_head = head_blk;
/*
- * Reset log values according to the state of the log when we
- * crashed. In the case where head_blk == 0, we bump curr_cycle
- * one because the next write starts a new cycle rather than
- * continuing the cycle of the last good log record. At this
- * point we have guaranteed that all partial log records have been
- * accounted for. Therefore, we know that the last good log record
- * written was complete and ended exactly on the end boundary
- * of the physical log.
+ * We now have a tail and temporary head block that covers at least
+ * XLOG_MAX_ICLOGS records from the tail. We need to verify that these
+ * records were completely written. Run a CRC verification pass from
+ * tail to head and return the result.
*/
- log->l_prev_block = i;
- log->l_curr_block = (int)*head_blk;
- log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
- if (found == 2)
- log->l_curr_cycle++;
- atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
- atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
- xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
- BBTOB(log->l_curr_block));
- xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
- BBTOB(log->l_curr_block));
+ error = xlog_do_recovery_pass(log, tmp_head, tail_blk,
+ XLOG_RECOVER_CRCPASS, &first_bad);
+
+out:
+ xlog_put_bp(bp);
+ return error;
+}
+
+/*
+ * Detect and trim torn writes from the head of the log.
+ *
+ * Storage without sector atomicity guarantees can result in torn writes in the
+ * log in the event of a crash. Our only means to detect this scenario is via
+ * CRC verification. While we can't always be certain that CRC verification
+ * failure is due to a torn write vs. an unrelated corruption, we do know that
+ * only a certain number (XLOG_MAX_ICLOGS) of log records can be written out at
+ * one time. Therefore, CRC verify up to XLOG_MAX_ICLOGS records at the head of
+ * the log and treat failures in this range as torn writes as a matter of
+ * policy. In the event of CRC failure, the head is walked back to the last good
+ * record in the log and the tail is updated from that record and verified.
+ */
+STATIC int
+xlog_verify_head(
+ struct xlog *log,
+ xfs_daddr_t *head_blk, /* in/out: unverified head */
+ xfs_daddr_t *tail_blk, /* out: tail block */
+ struct xfs_buf *bp,
+ xfs_daddr_t *rhead_blk, /* start blk of last record */
+ struct xlog_rec_header **rhead, /* ptr to last record */
+ bool *wrapped) /* last rec. wraps phys. log */
+{
+ struct xlog_rec_header *tmp_rhead;
+ struct xfs_buf *tmp_bp;
+ xfs_daddr_t first_bad;
+ xfs_daddr_t tmp_rhead_blk;
+ int found;
+ int error;
+ bool tmp_wrapped;
/*
- * Look for unmount record. If we find it, then we know there
- * was a clean unmount. Since 'i' could be the last block in
- * the physical log, we convert to a log block before comparing
- * to the head_blk.
+ * Check the head of the log for torn writes. Search backwards from the
+ * head until we hit the tail or the maximum number of log record I/Os
+ * that could have been in flight at one time. Use a temporary buffer so
+ * we don't trash the rhead/bp pointers from the caller.
+ */
+ tmp_bp = xlog_get_bp(log, 1);
+ if (!tmp_bp)
+ return -ENOMEM;
+ error = xlog_rseek_logrec_hdr(log, *head_blk, *tail_blk,
+ XLOG_MAX_ICLOGS, tmp_bp, &tmp_rhead_blk,
+ &tmp_rhead, &tmp_wrapped);
+ xlog_put_bp(tmp_bp);
+ if (error < 0)
+ return error;
+
+ /*
+ * Now run a CRC verification pass over the records starting at the
+ * block found above to the current head. If a CRC failure occurs, the
+ * log block of the first bad record is saved in first_bad.
+ */
+ error = xlog_do_recovery_pass(log, *head_blk, tmp_rhead_blk,
+ XLOG_RECOVER_CRCPASS, &first_bad);
+ if (error == -EFSBADCRC) {
+ /*
+ * We've hit a potential torn write. Reset the error and warn
+ * about it.
+ */
+ error = 0;
+ xfs_warn(log->l_mp,
+"Torn write (CRC failure) detected at log block 0x%llx. Truncating head block from 0x%llx.",
+ first_bad, *head_blk);
+
+ /*
+ * Get the header block and buffer pointer for the last good
+ * record before the bad record.
+ *
+ * Note that xlog_find_tail() clears the blocks at the new head
+ * (i.e., the records with invalid CRC) if the cycle number
+ * matches the the current cycle.
+ */
+ found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, bp,
+ rhead_blk, rhead, wrapped);
+ if (found < 0)
+ return found;
+ if (found == 0) /* XXX: right thing to do here? */
+ return -EIO;
+
+ /*
+ * Reset the head block to the starting block of the first bad
+ * log record and set the tail block based on the last good
+ * record.
+ *
+ * Bail out if the updated head/tail match as this indicates
+ * possible corruption outside of the acceptable
+ * (XLOG_MAX_ICLOGS) range. This is a job for xfs_repair...
+ */
+ *head_blk = first_bad;
+ *tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn));
+ if (*head_blk == *tail_blk) {
+ ASSERT(0);
+ return 0;
+ }
+
+ /*
+ * Now verify the tail based on the updated head. This is
+ * required because the torn writes trimmed from the head could
+ * have been written over the tail of a previous record. Return
+ * any errors since recovery cannot proceed if the tail is
+ * corrupt.
+ *
+ * XXX: This leaves a gap in truly robust protection from torn
+ * writes in the log. If the head is behind the tail, the tail
+ * pushes forward to create some space and then a crash occurs
+ * causing the writes into the previous record's tail region to
+ * tear, log recovery isn't able to recover.
+ *
+ * How likely is this to occur? If possible, can we do something
+ * more intelligent here? Is it safe to push the tail forward if
+ * we can determine that the tail is within the range of the
+ * torn write (e.g., the kernel can only overwrite the tail if
+ * it has actually been pushed forward)? Alternatively, could we
+ * somehow prevent this condition at runtime?
+ */
+ error = xlog_verify_tail(log, *head_blk, *tail_blk);
+ }
+
+ return error;
+}
+
+/*
+ * Check whether the head of the log points to an unmount record. In other
+ * words, determine whether the log is clean. If so, update the in-core state
+ * appropriately.
+ */
+static int
+xlog_check_unmount_rec(
+ struct xlog *log,
+ xfs_daddr_t *head_blk,
+ xfs_daddr_t *tail_blk,
+ struct xlog_rec_header *rhead,
+ xfs_daddr_t rhead_blk,
+ struct xfs_buf *bp,
+ bool *clean)
+{
+ struct xlog_op_header *op_head;
+ xfs_daddr_t umount_data_blk;
+ xfs_daddr_t after_umount_blk;
+ int hblks;
+ int error;
+ char *offset;
+
+ *clean = false;
+
+ /*
+ * Look for unmount record. If we find it, then we know there was a
+ * clean unmount. Since 'i' could be the last block in the physical
+ * log, we convert to a log block before comparing to the head_blk.
*
- * Save the current tail lsn to use to pass to
- * xlog_clear_stale_blocks() below. We won't want to clear the
- * unmount record if there is one, so we pass the lsn of the
- * unmount record rather than the block after it.
+ * Save the current tail lsn to use to pass to xlog_clear_stale_blocks()
+ * below. We won't want to clear the unmount record if there is one, so
+ * we pass the lsn of the unmount record rather than the block after it.
*/
if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
int h_size = be32_to_cpu(rhead->h_size);
@@ -1014,22 +1246,22 @@ xlog_find_tail(
} else {
hblks = 1;
}
- after_umount_blk = (i + hblks + (int)
- BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize;
- tail_lsn = atomic64_read(&log->l_tail_lsn);
+ after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
+ after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
if (*head_blk == after_umount_blk &&
be32_to_cpu(rhead->h_num_logops) == 1) {
- umount_data_blk = (i + hblks) % log->l_logBBsize;
+ umount_data_blk = rhead_blk + hblks;
+ umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
if (error)
- goto done;
+ return error;
- op_head = (xlog_op_header_t *)offset;
+ op_head = (struct xlog_op_header *)offset;
if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
/*
- * Set tail and last sync so that newly written
- * log records will point recovery to after the
- * current unmount record.
+ * Set tail and last sync so that newly written log
+ * records will point recovery to after the current
+ * unmount record.
*/
xlog_assign_atomic_lsn(&log->l_tail_lsn,
log->l_curr_cycle, after_umount_blk);
@@ -1037,16 +1269,166 @@ xlog_find_tail(
log->l_curr_cycle, after_umount_blk);
*tail_blk = after_umount_blk;
- /*
- * Note that the unmount was clean. If the unmount
- * was not clean, we need to know this to rebuild the
- * superblock counters from the perag headers if we
- * have a filesystem using non-persistent counters.
- */
- log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
+ *clean = true;
}
}
+ return 0;
+}
+
+static void
+xlog_set_state(
+ struct xlog *log,
+ xfs_daddr_t head_blk,
+ struct xlog_rec_header *rhead,
+ xfs_daddr_t rhead_blk,
+ bool bump_cycle)
+{
+ /*
+ * Reset log values according to the state of the log when we
+ * crashed. In the case where head_blk == 0, we bump curr_cycle
+ * one because the next write starts a new cycle rather than
+ * continuing the cycle of the last good log record. At this
+ * point we have guaranteed that all partial log records have been
+ * accounted for. Therefore, we know that the last good log record
+ * written was complete and ended exactly on the end boundary
+ * of the physical log.
+ */
+ log->l_prev_block = rhead_blk;
+ log->l_curr_block = (int)head_blk;
+ log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
+ if (bump_cycle)
+ log->l_curr_cycle++;
+ atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
+ atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
+ xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
+ BBTOB(log->l_curr_block));
+ xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
+ BBTOB(log->l_curr_block));
+}
+
+/*
+ * Find the sync block number or the tail of the log.
+ *
+ * This will be the block number of the last record to have its
+ * associated buffers synced to disk. Every log record header has
+ * a sync lsn embedded in it. LSNs hold block numbers, so it is easy
+ * to get a sync block number. The only concern is to figure out which
+ * log record header to believe.
+ *
+ * The following algorithm uses the log record header with the largest
+ * lsn. The entire log record does not need to be valid. We only care
+ * that the header is valid.
+ *
+ * We could speed up search by using current head_blk buffer, but it is not
+ * available.
+ */
+STATIC int
+xlog_find_tail(
+ struct xlog *log,
+ xfs_daddr_t *head_blk,
+ xfs_daddr_t *tail_blk)
+{
+ xlog_rec_header_t *rhead;
+ char *offset = NULL;
+ xfs_buf_t *bp;
+ int error;
+ xfs_daddr_t rhead_blk;
+ xfs_lsn_t tail_lsn;
+ bool wrapped = false;
+ bool clean = false;
+
+ /*
+ * Find previous log record
+ */
+ if ((error = xlog_find_head(log, head_blk)))
+ return error;
+ ASSERT(*head_blk < INT_MAX);
+
+ bp = xlog_get_bp(log, 1);
+ if (!bp)
+ return -ENOMEM;
+ if (*head_blk == 0) { /* special case */
+ error = xlog_bread(log, 0, 1, bp, &offset);
+ if (error)
+ goto done;
+
+ if (xlog_get_cycle(offset) == 0) {
+ *tail_blk = 0;
+ /* leave all other log inited values alone */
+ goto done;
+ }
+ }
+
+ /*
+ * Search backwards through the log looking for the log record header
+ * block. This wraps all the way back around to the head so something is
+ * seriously wrong if we can't find it.
+ */
+ error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp,
+ &rhead_blk, &rhead, &wrapped);
+ if (error < 0)
+ return error;
+ if (!error) {
+ xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
+ return -EIO;
+ }
+ *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
+
+ /*
+ * Set the log state based on the current head record.
+ */
+ xlog_set_state(log, *head_blk, rhead, rhead_blk, wrapped);
+ tail_lsn = atomic64_read(&log->l_tail_lsn);
+
+ /*
+ * Look for an unmount record at the head of the log. This sets the log
+ * state to determine whether recovery is necessary.
+ */
+ error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead,
+ rhead_blk, bp, &clean);
+ if (error)
+ goto done;
+
+ /*
+ * Verify the log head if the log is not clean (e.g., we have anything
+ * but an unmount record at the head). This uses CRC verification to
+ * detect and trim torn writes. If discovered, CRC failures are
+ * considered torn writes and the log head is trimmed accordingly.
+ *
+ * Note that we can only run CRC verification when the log is dirty
+ * because there's no guarantee that the log data behind an unmount
+ * record is compatible with the current architecture.
+ */
+ if (!clean) {
+ xfs_daddr_t orig_head = *head_blk;
+
+ error = xlog_verify_head(log, head_blk, tail_blk, bp,
+ &rhead_blk, &rhead, &wrapped);
+ if (error)
+ goto done;
+
+ /* update in-core state again if the head changed */
+ if (*head_blk != orig_head) {
+ xlog_set_state(log, *head_blk, rhead, rhead_blk,
+ wrapped);
+ tail_lsn = atomic64_read(&log->l_tail_lsn);
+ error = xlog_check_unmount_rec(log, head_blk, tail_blk,
+ rhead, rhead_blk, bp,
+ &clean);
+ if (error)
+ goto done;
+ }
+ }
+
+ /*
+ * Note that the unmount was clean. If the unmount was not clean, we
+ * need to know this to rebuild the superblock counters from the perag
+ * headers if we have a filesystem using non-persistent counters.
+ */
+ if (clean)
+ log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
+
/*
* Make sure that there are no blocks in front of the head
* with the same cycle number as the head. This can happen
@@ -3204,6 +3586,7 @@ xlog_recover_dquot_ra_pass2(
struct xfs_disk_dquot *recddq;
struct xfs_dq_logformat *dq_f;
uint type;
+ int len;
if (mp->m_qflags == 0)
@@ -3224,8 +3607,12 @@ xlog_recover_dquot_ra_pass2(
ASSERT(dq_f);
ASSERT(dq_f->qlf_len == 1);
- xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno,
- XFS_FSB_TO_BB(mp, dq_f->qlf_len), NULL);
+ len = XFS_FSB_TO_BB(mp, dq_f->qlf_len);
+ if (xlog_peek_buffer_cancelled(log, dq_f->qlf_blkno, len, 0))
+ return;
+
+ xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno, len,
+ &xfs_dquot_buf_ra_ops);
}
STATIC void
@@ -4118,25 +4505,68 @@ xlog_recover_process_iunlinks(
mp->m_dmevmask = mp_dmevmask;
}
+STATIC int
+xlog_unpack_data(
+ struct xlog_rec_header *rhead,
+ char *dp,
+ struct xlog *log)
+{
+ int i, j, k;
+
+ for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) &&
+ i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
+ *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i];
+ dp += BBSIZE;
+ }
+
+ if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+ xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead;
+ for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
+ j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
+ k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
+ *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
+ dp += BBSIZE;
+ }
+ }
+
+ return 0;
+}
+
/*
- * Upack the log buffer data and crc check it. If the check fails, issue a
- * warning if and only if the CRC in the header is non-zero. This makes the
- * check an advisory warning, and the zero CRC check will prevent failure
- * warnings from being emitted when upgrading the kernel from one that does not
- * add CRCs by default.
- *
- * When filesystems are CRC enabled, this CRC mismatch becomes a fatal log
- * corruption failure
+ * CRC check, unpack and process a log record.
*/
STATIC int
-xlog_unpack_data_crc(
+xlog_recover_process(
+ struct xlog *log,
+ struct hlist_head rhash[],
struct xlog_rec_header *rhead,
char *dp,
- struct xlog *log)
+ int pass)
{
+ int error;
__le32 crc;
crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len));
+
+ /*
+ * Nothing else to do if this is a CRC verification pass. Just return
+ * if this a record with a non-zero crc. Unfortunately, mkfs always
+ * sets h_crc to 0 so we must consider this valid even on v5 supers.
+ * Otherwise, return EFSBADCRC on failure so the callers up the stack
+ * know precisely what failed.
+ */
+ if (pass == XLOG_RECOVER_CRCPASS) {
+ if (rhead->h_crc && crc != rhead->h_crc)
+ return -EFSBADCRC;
+ return 0;
+ }
+
+ /*
+ * We're in the normal recovery path. Issue a warning if and only if the
+ * CRC in the header is non-zero. This is an advisory warning and the
+ * zero CRC check prevents warnings from being emitted when upgrading
+ * the kernel from one that does not add CRCs by default.
+ */
if (crc != rhead->h_crc) {
if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
xfs_alert(log->l_mp,
@@ -4147,47 +4577,18 @@ xlog_unpack_data_crc(
}
/*
- * If we've detected a log record corruption, then we can't
- * recover past this point. Abort recovery if we are enforcing
- * CRC protection by punting an error back up the stack.
+ * If the filesystem is CRC enabled, this mismatch becomes a
+ * fatal log corruption failure.
*/
if (xfs_sb_version_hascrc(&log->l_mp->m_sb))
return -EFSCORRUPTED;
}
- return 0;
-}
-
-STATIC int
-xlog_unpack_data(
- struct xlog_rec_header *rhead,
- char *dp,
- struct xlog *log)
-{
- int i, j, k;
- int error;
-
- error = xlog_unpack_data_crc(rhead, dp, log);
+ error = xlog_unpack_data(rhead, dp, log);
if (error)
return error;
- for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) &&
- i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
- *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i];
- dp += BBSIZE;
- }
-
- if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
- xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead;
- for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
- j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
- k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
- *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
- dp += BBSIZE;
- }
- }
-
- return 0;
+ return xlog_recover_process_data(log, rhash, rhead, dp, pass);
}
STATIC int
@@ -4239,18 +4640,21 @@ xlog_do_recovery_pass(
struct xlog *log,
xfs_daddr_t head_blk,
xfs_daddr_t tail_blk,
- int pass)
+ int pass,
+ xfs_daddr_t *first_bad) /* out: first bad log rec */
{
xlog_rec_header_t *rhead;
xfs_daddr_t blk_no;
+ xfs_daddr_t rhead_blk;
char *offset;
xfs_buf_t *hbp, *dbp;
- int error = 0, h_size;
+ int error = 0, h_size, h_len;
int bblks, split_bblks;
int hblks, split_hblks, wrapped_hblks;
struct hlist_head rhash[XLOG_RHASH_SIZE];
ASSERT(head_blk != tail_blk);
+ rhead_blk = 0;
/*
* Read the header of the tail block and get the iclog buffer size from
@@ -4274,7 +4678,31 @@ xlog_do_recovery_pass(
error = xlog_valid_rec_header(log, rhead, tail_blk);
if (error)
goto bread_err1;
+
+ /*
+ * xfsprogs has a bug where record length is based on lsunit but
+ * h_size (iclog size) is hardcoded to 32k. Now that we
+ * unconditionally CRC verify the unmount record, this means the
+ * log buffer can be too small for the record and cause an
+ * overrun.
+ *
+ * Detect this condition here. Use lsunit for the buffer size as
+ * long as this looks like the mkfs case. Otherwise, return an
+ * error to avoid a buffer overrun.
+ */
h_size = be32_to_cpu(rhead->h_size);
+ h_len = be32_to_cpu(rhead->h_len);
+ if (h_len > h_size) {
+ if (h_len <= log->l_mp->m_logbsize &&
+ be32_to_cpu(rhead->h_num_logops) == 1) {
+ xfs_warn(log->l_mp,
+ "invalid iclog size (%d bytes), using lsunit (%d bytes)",
+ h_size, log->l_mp->m_logbsize);
+ h_size = log->l_mp->m_logbsize;
+ } else
+ return -EFSCORRUPTED;
+ }
+
if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) &&
(h_size > XLOG_HEADER_CYCLE_SIZE)) {
hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
@@ -4301,7 +4729,7 @@ xlog_do_recovery_pass(
}
memset(rhash, 0, sizeof(rhash));
- blk_no = tail_blk;
+ blk_no = rhead_blk = tail_blk;
if (tail_blk > head_blk) {
/*
* Perform recovery around the end of the physical log.
@@ -4408,19 +4836,18 @@ xlog_do_recovery_pass(
goto bread_err2;
}
- error = xlog_unpack_data(rhead, offset, log);
+ error = xlog_recover_process(log, rhash, rhead, offset,
+ pass);
if (error)
goto bread_err2;
- error = xlog_recover_process_data(log, rhash,
- rhead, offset, pass);
- if (error)
- goto bread_err2;
blk_no += bblks;
+ rhead_blk = blk_no;
}
ASSERT(blk_no >= log->l_logBBsize);
blk_no -= log->l_logBBsize;
+ rhead_blk = blk_no;
}
/* read first part of physical log */
@@ -4441,21 +4868,22 @@ xlog_do_recovery_pass(
if (error)
goto bread_err2;
- error = xlog_unpack_data(rhead, offset, log);
+ error = xlog_recover_process(log, rhash, rhead, offset, pass);
if (error)
goto bread_err2;
- error = xlog_recover_process_data(log, rhash,
- rhead, offset, pass);
- if (error)
- goto bread_err2;
blk_no += bblks + hblks;
+ rhead_blk = blk_no;
}
bread_err2:
xlog_put_bp(dbp);
bread_err1:
xlog_put_bp(hbp);
+
+ if (error && first_bad)
+ *first_bad = rhead_blk;
+
return error;
}
@@ -4493,7 +4921,7 @@ xlog_do_log_recovery(
INIT_LIST_HEAD(&log->l_buf_cancel_table[i]);
error = xlog_do_recovery_pass(log, head_blk, tail_blk,
- XLOG_RECOVER_PASS1);
+ XLOG_RECOVER_PASS1, NULL);
if (error != 0) {
kmem_free(log->l_buf_cancel_table);
log->l_buf_cancel_table = NULL;
@@ -4504,7 +4932,7 @@ xlog_do_log_recovery(
* When it is complete free the table of buf cancel items.
*/
error = xlog_do_recovery_pass(log, head_blk, tail_blk,
- XLOG_RECOVER_PASS2);
+ XLOG_RECOVER_PASS2, NULL);
#ifdef DEBUG
if (!error) {
int i;
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index dc6221942..ade236e90 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -42,11 +42,11 @@ xfs_break_layouts(
while ((error = break_layout(inode, false) == -EWOULDBLOCK)) {
xfs_iunlock(ip, *iolock);
if (with_imutex && (*iolock & XFS_IOLOCK_EXCL))
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
error = break_layout(inode, true);
*iolock = XFS_IOLOCK_EXCL;
if (with_imutex)
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
xfs_ilock(ip, *iolock);
}
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index ab1bac6a3..be02a68b2 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -766,7 +766,6 @@ xfs_growfs_rt_alloc(
{
xfs_fileoff_t bno; /* block number in file */
struct xfs_buf *bp; /* temporary buffer for zeroing */
- int committed; /* transaction committed flag */
xfs_daddr_t d; /* disk block address */
int error; /* error return value */
xfs_fsblock_t firstblock;/* first block allocated in xaction */
@@ -811,7 +810,7 @@ xfs_growfs_rt_alloc(
/*
* Free any blocks freed up in the transaction, then commit.
*/
- error = xfs_bmap_finish(&tp, &flist, &committed);
+ error = xfs_bmap_finish(&tp, &flist, NULL);
if (error)
goto out_bmap_cancel;
error = xfs_trans_commit(tp);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 36bd8825b..59c9b7bd9 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -137,7 +137,7 @@ static const match_table_t tokens = {
};
-STATIC unsigned long
+STATIC int
suffix_kstrtoint(char *s, unsigned int base, int *res)
{
int last, shift_left_factor = 0, _res;
@@ -1714,8 +1714,8 @@ xfs_init_zones(void)
xfs_inode_zone =
kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
- KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD,
- xfs_fs_inode_init_once);
+ KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD |
+ KM_ZONE_ACCOUNT, xfs_fs_inode_init_once);
if (!xfs_inode_zone)
goto out_destroy_efi_zone;
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 996481eeb..b44284c1a 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -178,7 +178,6 @@ xfs_symlink(
struct xfs_bmap_free free_list;
xfs_fsblock_t first_block;
bool unlock_dp_on_error = false;
- int committed;
xfs_fileoff_t first_fsb;
xfs_filblks_t fs_blocks;
int nmaps;
@@ -387,7 +386,7 @@ xfs_symlink(
xfs_trans_set_sync(tp);
}
- error = xfs_bmap_finish(&tp, &free_list, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto out_bmap_cancel;
@@ -434,7 +433,6 @@ xfs_inactive_symlink_rmt(
struct xfs_inode *ip)
{
xfs_buf_t *bp;
- int committed;
int done;
int error;
xfs_fsblock_t first_block;
@@ -510,16 +508,10 @@ xfs_inactive_symlink_rmt(
/*
* Commit the first transaction. This logs the EFI and the inode.
*/
- error = xfs_bmap_finish(&tp, &free_list, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, ip);
if (error)
goto error_bmap_cancel;
/*
- * The transaction must have been committed, since there were
- * actually extents freed by xfs_bunmapi. See xfs_bmap_finish.
- * The new tp has the extent freeing and EFDs.
- */
- ASSERT(committed);
- /*
* The first xact was committed, so add the inode to the new one.
* Mark it dirty so it will be logged and moved forward in the log as
* part of every commit.
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index ee70f5dec..641d625eb 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -255,11 +255,47 @@ write_grant_head_show(
}
XFS_SYSFS_ATTR_RO(write_grant_head);
+#ifdef DEBUG
+STATIC ssize_t
+log_badcrc_factor_store(
+ struct kobject *kobject,
+ const char *buf,
+ size_t count)
+{
+ struct xlog *log = to_xlog(kobject);
+ int ret;
+ uint32_t val;
+
+ ret = kstrtouint(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ log->l_badcrc_factor = val;
+
+ return count;
+}
+
+STATIC ssize_t
+log_badcrc_factor_show(
+ struct kobject *kobject,
+ char *buf)
+{
+ struct xlog *log = to_xlog(kobject);
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", log->l_badcrc_factor);
+}
+
+XFS_SYSFS_ATTR_RW(log_badcrc_factor);
+#endif /* DEBUG */
+
static struct attribute *xfs_log_attrs[] = {
ATTR_LIST(log_head_lsn),
ATTR_LIST(log_tail_lsn),
ATTR_LIST(reserve_grant_head),
ATTR_LIST(write_grant_head),
+#ifdef DEBUG
+ ATTR_LIST(log_badcrc_factor),
+#endif
NULL,
};
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 877079eb0..391d797cb 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1222,6 +1222,32 @@ DEFINE_PAGE_EVENT(xfs_writepage);
DEFINE_PAGE_EVENT(xfs_releasepage);
DEFINE_PAGE_EVENT(xfs_invalidatepage);
+DECLARE_EVENT_CLASS(xfs_readpage_class,
+ TP_PROTO(struct inode *inode, int nr_pages),
+ TP_ARGS(inode, nr_pages),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(int, nr_pages)
+ ),
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->nr_pages = nr_pages;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx nr_pages %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->nr_pages)
+)
+
+#define DEFINE_READPAGE_EVENT(name) \
+DEFINE_EVENT(xfs_readpage_class, name, \
+ TP_PROTO(struct inode *inode, int nr_pages), \
+ TP_ARGS(inode, nr_pages))
+DEFINE_READPAGE_EVENT(xfs_vm_readpage);
+DEFINE_READPAGE_EVENT(xfs_vm_readpages);
+
DECLARE_EVENT_CLASS(xfs_imap_class,
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
int type, struct xfs_bmbt_irec *irec),
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index ce78534a0..995170194 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -572,12 +572,16 @@ xfs_quota_warn(
struct xfs_dquot *dqp,
int type)
{
- /* no warnings for project quotas - we just return ENOSPC later */
+ enum quota_type qtype;
+
if (dqp->dq_flags & XFS_DQ_PROJ)
- return;
- quota_send_warning(make_kqid(&init_user_ns,
- (dqp->dq_flags & XFS_DQ_USER) ?
- USRQUOTA : GRPQUOTA,
+ qtype = PRJQUOTA;
+ else if (dqp->dq_flags & XFS_DQ_USER)
+ qtype = USRQUOTA;
+ else
+ qtype = GRPQUOTA;
+
+ quota_send_warning(make_kqid(&init_user_ns, qtype,
be32_to_cpu(dqp->q_core.d_id)),
mp->m_super->s_dev, type);
}
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index 839b35ca2..110f1d7d8 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -39,9 +39,6 @@ xfs_xattr_get(const struct xattr_handler *handler, struct dentry *dentry,
struct xfs_inode *ip = XFS_I(d_inode(dentry));
int error, asize = size;
- if (strcmp(name, "") == 0)
- return -EINVAL;
-
/* Convert Linux syscall to XFS internal ATTR flags */
if (!size) {
xflags |= ATTR_KERNOVAL;
@@ -84,9 +81,6 @@ xfs_xattr_set(const struct xattr_handler *handler, struct dentry *dentry,
struct xfs_inode *ip = XFS_I(d_inode(dentry));
int error;
- if (strcmp(name, "") == 0)
- return -EINVAL;
-
/* Convert Linux syscall to XFS internal ATTR flags */
if (flags & XATTR_CREATE)
xflags |= ATTR_CREATE;
@@ -135,47 +129,19 @@ const struct xattr_handler *xfs_xattr_handlers[] = {
NULL
};
-static unsigned int xfs_xattr_prefix_len(int flags)
-{
- if (flags & XFS_ATTR_SECURE)
- return sizeof("security");
- else if (flags & XFS_ATTR_ROOT)
- return sizeof("trusted");
- else
- return sizeof("user");
-}
-
-static const char *xfs_xattr_prefix(int flags)
-{
- if (flags & XFS_ATTR_SECURE)
- return xfs_xattr_security_handler.prefix;
- else if (flags & XFS_ATTR_ROOT)
- return xfs_xattr_trusted_handler.prefix;
- else
- return xfs_xattr_user_handler.prefix;
-}
-
static int
-xfs_xattr_put_listent(
+__xfs_xattr_put_listent(
struct xfs_attr_list_context *context,
- int flags,
- unsigned char *name,
- int namelen,
- int valuelen,
- unsigned char *value)
+ char *prefix,
+ int prefix_len,
+ unsigned char *name,
+ int namelen)
{
- unsigned int prefix_len = xfs_xattr_prefix_len(flags);
char *offset;
int arraytop;
- ASSERT(context->count >= 0);
-
- /*
- * Only show root namespace entries if we are actually allowed to
- * see them.
- */
- if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN))
- return 0;
+ if (!context->alist)
+ goto compute_size;
arraytop = context->count + prefix_len + namelen + 1;
if (arraytop > context->firstu) {
@@ -183,17 +149,19 @@ xfs_xattr_put_listent(
return 1;
}
offset = (char *)context->alist + context->count;
- strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
+ strncpy(offset, prefix, prefix_len);
offset += prefix_len;
strncpy(offset, (char *)name, namelen); /* real name */
offset += namelen;
*offset = '\0';
+
+compute_size:
context->count += prefix_len + namelen + 1;
return 0;
}
static int
-xfs_xattr_put_listent_sizes(
+xfs_xattr_put_listent(
struct xfs_attr_list_context *context,
int flags,
unsigned char *name,
@@ -201,24 +169,55 @@ xfs_xattr_put_listent_sizes(
int valuelen,
unsigned char *value)
{
- context->count += xfs_xattr_prefix_len(flags) + namelen + 1;
- return 0;
-}
+ char *prefix;
+ int prefix_len;
-static int
-list_one_attr(const char *name, const size_t len, void *data,
- size_t size, ssize_t *result)
-{
- char *p = data + *result;
+ ASSERT(context->count >= 0);
- *result += len;
- if (!size)
- return 0;
- if (*result > size)
- return -ERANGE;
+ if (flags & XFS_ATTR_ROOT) {
+#ifdef CONFIG_XFS_POSIX_ACL
+ if (namelen == SGI_ACL_FILE_SIZE &&
+ strncmp(name, SGI_ACL_FILE,
+ SGI_ACL_FILE_SIZE) == 0) {
+ int ret = __xfs_xattr_put_listent(
+ context, XATTR_SYSTEM_PREFIX,
+ XATTR_SYSTEM_PREFIX_LEN,
+ XATTR_POSIX_ACL_ACCESS,
+ strlen(XATTR_POSIX_ACL_ACCESS));
+ if (ret)
+ return ret;
+ } else if (namelen == SGI_ACL_DEFAULT_SIZE &&
+ strncmp(name, SGI_ACL_DEFAULT,
+ SGI_ACL_DEFAULT_SIZE) == 0) {
+ int ret = __xfs_xattr_put_listent(
+ context, XATTR_SYSTEM_PREFIX,
+ XATTR_SYSTEM_PREFIX_LEN,
+ XATTR_POSIX_ACL_DEFAULT,
+ strlen(XATTR_POSIX_ACL_DEFAULT));
+ if (ret)
+ return ret;
+ }
+#endif
- strcpy(p, name);
- return 0;
+ /*
+ * Only show root namespace entries if we are actually allowed to
+ * see them.
+ */
+ if (!capable(CAP_SYS_ADMIN))
+ return 0;
+
+ prefix = XATTR_TRUSTED_PREFIX;
+ prefix_len = XATTR_TRUSTED_PREFIX_LEN;
+ } else if (flags & XFS_ATTR_SECURE) {
+ prefix = XATTR_SECURITY_PREFIX;
+ prefix_len = XATTR_SECURITY_PREFIX_LEN;
+ } else {
+ prefix = XATTR_USER_PREFIX;
+ prefix_len = XATTR_USER_PREFIX_LEN;
+ }
+
+ return __xfs_xattr_put_listent(context, prefix, prefix_len, name,
+ namelen);
}
ssize_t
@@ -227,7 +226,6 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
struct xfs_attr_list_context context;
struct attrlist_cursor_kern cursor = { 0 };
struct inode *inode = d_inode(dentry);
- int error;
/*
* First read the regular on-disk attributes.
@@ -236,37 +234,14 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
context.dp = XFS_I(inode);
context.cursor = &cursor;
context.resynch = 1;
- context.alist = data;
+ context.alist = size ? data : NULL;
context.bufsize = size;
context.firstu = context.bufsize;
-
- if (size)
- context.put_listent = xfs_xattr_put_listent;
- else
- context.put_listent = xfs_xattr_put_listent_sizes;
+ context.put_listent = xfs_xattr_put_listent;
xfs_attr_list_int(&context);
if (context.count < 0)
return -ERANGE;
- /*
- * Then add the two synthetic ACL attributes.
- */
- if (posix_acl_access_exists(inode)) {
- error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
- strlen(POSIX_ACL_XATTR_ACCESS) + 1,
- data, size, &context.count);
- if (error)
- return error;
- }
-
- if (posix_acl_default_exists(inode)) {
- error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
- strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
- data, size, &context.count);
- if (error)
- return error;
- }
-
return context.count;
}